{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1042.0, "completions/mean_length": 538.2380981445312, "completions/min_length": 296.0, "entropy/max": 0.94140625, "entropy/mean": 0.515625, "entropy/min": 0.2197265625, "epoch": 0.001, "grad_norm": 1.4225373801095518, "kl": 0.0, "learning_rate": 2e-07, "loss": 2.4977188672892225e-07, "memory(GiB)": 122.13, "reward": 1.0490549802780151, "reward_std": 0.27390730381011963, "rewards/EvidenceFormat/mean": 0.6547619104385376, "rewards/EvidenceFormat/std": 0.23395057022571564, "rewards/EvidenceHallucination/mean": 0.03694198280572891, "rewards/EvidenceHallucination/std": 0.13737311959266663, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 1.4362164735794067, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2142857164144516, "rewards/VideoAccuracy/std": 0.41529974341392517, "step": 1, "train_speed(iter/s)": 0.007082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 572.7857055664062, "completions/min_length": 316.0, "entropy/max": 1.3671875, "entropy/mean": 0.57421875, "entropy/min": 0.22265625, "epoch": 0.002, "grad_norm": 1.43247077891897, "kl": 0.0, "learning_rate": 4e-07, "loss": 2.7531672230907134e-07, "memory(GiB)": 144.56, "reward": 1.0515960454940796, "reward_std": 0.3841308057308197, "rewards/EvidenceFormat/mean": 0.6785714626312256, "rewards/EvidenceFormat/std": 0.3088418245315552, "rewards/EvidenceHallucination/mean": 0.04964689537882805, "rewards/EvidenceHallucination/std": 0.12500926852226257, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 5.710444450378418, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2142857164144516, "rewards/VideoAccuracy/std": 0.41529974341392517, "step": 2, "train_speed(iter/s)": 0.009314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1034.0, "completions/mean_length": 503.4761962890625, "completions/min_length": 181.0, "entropy/max": 1.0390625, "entropy/mean": 0.447265625, "entropy/min": 0.111328125, "epoch": 0.003, "grad_norm": 1.2979495807156205, "kl": 0.00079345703125, "learning_rate": 6e-07, "loss": 8.431219612248242e-06, "memory(GiB)": 144.91, "reward": 1.3539303541183472, "reward_std": 0.45905885100364685, "rewards/EvidenceFormat/mean": 0.7976190447807312, "rewards/EvidenceFormat/std": 0.27183935046195984, "rewards/EvidenceHallucination/mean": 0.09251835197210312, "rewards/EvidenceHallucination/std": 0.19499725103378296, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 1.6952828168869019, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4366172254085541, "rewards/VideoAccuracy/std": 0.48768168687820435, "step": 3, "train_speed(iter/s)": 0.011757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 697.0, "completions/mean_length": 462.3333435058594, "completions/min_length": 281.0, "entropy/max": 0.50390625, "entropy/mean": 0.359375, "entropy/min": 0.2177734375, "epoch": 0.004, "grad_norm": 1.3667239353418728, "kl": 0.000659942626953125, "learning_rate": 8e-07, "loss": 6.982259037613403e-06, "memory(GiB)": 144.91, "reward": 1.2883940935134888, "reward_std": 0.2476130723953247, "rewards/EvidenceFormat/mean": 0.761904776096344, "rewards/EvidenceFormat/std": 0.33564817905426025, "rewards/EvidenceHallucination/mean": 0.0918847993016243, "rewards/EvidenceHallucination/std": 0.17514179646968842, "rewards/Evidence_Num_Record/mean": 2.6190476417541504, "rewards/Evidence_Num_Record/std": 1.2869397401809692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.523809552192688, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.28430283069610596, "rewards/VideoAccuracy/std": 0.4535316228866577, "step": 4, "train_speed(iter/s)": 0.014002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 665.6666870117188, "completions/min_length": 343.0, "entropy/max": 1.609375, "entropy/mean": 0.54296875, "entropy/min": 0.1865234375, "epoch": 0.005, "grad_norm": 1.245853661293062, "kl": 0.000640869140625, "learning_rate": 1e-06, "loss": 7.0298005994118284e-06, "memory(GiB)": 144.91, "reward": 0.9924663305282593, "reward_std": 0.2152191400527954, "rewards/EvidenceFormat/mean": 0.75, "rewards/EvidenceFormat/std": 0.27607882022857666, "rewards/EvidenceHallucination/mean": 0.013307266868650913, "rewards/EvidenceHallucination/std": 0.06059669703245163, "rewards/Evidence_Num_Record/mean": 4.5, "rewards/Evidence_Num_Record/std": 4.289806842803955, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500061869621277, "rewards/VideoAccuracy/mean": 0.07432877272367477, "rewards/VideoAccuracy/std": 0.1967284381389618, "step": 5, "train_speed(iter/s)": 0.013814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 587.0, "completions/mean_length": 414.7857360839844, "completions/min_length": 125.0, "entropy/max": 0.52734375, "entropy/mean": 0.384765625, "entropy/min": 0.2041015625, "epoch": 0.006, "grad_norm": 1.7852011276259492, "kl": 0.000865936279296875, "learning_rate": 1.2e-06, "loss": 9.149313882517163e-06, "memory(GiB)": 144.91, "reward": 1.2794909477233887, "reward_std": 0.29543742537498474, "rewards/EvidenceFormat/mean": 0.5833333730697632, "rewards/EvidenceFormat/std": 0.3477570414543152, "rewards/EvidenceHallucination/mean": 0.05816828832030296, "rewards/EvidenceHallucination/std": 0.1828164905309677, "rewards/Evidence_Num_Record/mean": 2.0238096714019775, "rewards/Evidence_Num_Record/std": 1.092950701713562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4761904776096344, "rewards/VideoAccuracy/std": 0.5054867267608643, "step": 6, "train_speed(iter/s)": 0.014329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 741.0, "completions/mean_length": 420.3095397949219, "completions/min_length": 265.0, "entropy/max": 0.62890625, "entropy/mean": 0.361328125, "entropy/min": 0.166015625, "epoch": 0.007, "grad_norm": 1.4338167561294133, "kl": 0.00122833251953125, "learning_rate": 1.4e-06, "loss": 1.2808613064407837e-05, "memory(GiB)": 144.91, "reward": 1.4019895792007446, "reward_std": 0.419267475605011, "rewards/EvidenceFormat/mean": 0.738095223903656, "rewards/EvidenceFormat/std": 0.3863224685192108, "rewards/EvidenceHallucination/mean": 0.143430694937706, "rewards/EvidenceHallucination/std": 0.22957825660705566, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 1.4109246730804443, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2142857164144516, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 0.4613986015319824, "rewards/VideoAccuracy/std": 0.44075724482536316, "step": 7, "train_speed(iter/s)": 0.015176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 571.1666870117188, "completions/min_length": 261.0, "entropy/max": 0.6953125, "entropy/mean": 0.353515625, "entropy/min": 0.1552734375, "epoch": 0.008, "grad_norm": 1.1848593304684465, "kl": 0.00173187255859375, "learning_rate": 1.6e-06, "loss": 1.9516264728736132e-05, "memory(GiB)": 144.91, "reward": 1.365936517715454, "reward_std": 0.3504869341850281, "rewards/EvidenceFormat/mean": 0.8809524178504944, "rewards/EvidenceFormat/std": 0.2661725878715515, "rewards/EvidenceHallucination/mean": 0.0939042791724205, "rewards/EvidenceHallucination/std": 0.21233110129833221, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 3.964124917984009, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.5952380895614624, "rewards/HonestTime/std": 0.49679574370384216, "rewards/VideoAccuracy/mean": 0.2995365560054779, "rewards/VideoAccuracy/std": 0.3632737100124359, "step": 8, "train_speed(iter/s)": 0.015084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1694.0, "completions/mean_length": 485.9285888671875, "completions/min_length": 229.0, "entropy/max": 1.3984375, "entropy/mean": 0.453125, "entropy/min": 0.1259765625, "epoch": 0.009, "grad_norm": 1.5868900531869705, "kl": 0.0174560546875, "learning_rate": 1.8e-06, "loss": 0.00018728773284237832, "memory(GiB)": 144.91, "reward": 1.1515543460845947, "reward_std": 0.39280664920806885, "rewards/EvidenceFormat/mean": 0.8452380895614624, "rewards/EvidenceFormat/std": 0.25870442390441895, "rewards/EvidenceHallucination/mean": 0.07324790209531784, "rewards/EvidenceHallucination/std": 0.23180602490901947, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 3.1018333435058594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2142857164144516, "rewards/VideoAccuracy/std": 0.4152997136116028, "step": 9, "train_speed(iter/s)": 0.015322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 474.0476379394531, "completions/min_length": 254.0, "entropy/max": 0.7109375, "entropy/mean": 0.412109375, "entropy/min": 0.1103515625, "epoch": 0.01, "grad_norm": 1.3485732635442316, "kl": 0.004730224609375, "learning_rate": 2e-06, "loss": 4.4209620682522655e-05, "memory(GiB)": 144.91, "reward": 1.1982691287994385, "reward_std": 0.35777562856674194, "rewards/EvidenceFormat/mean": 0.9285714626312256, "rewards/EvidenceFormat/std": 0.26066118478775024, "rewards/EvidenceHallucination/mean": 0.06569261848926544, "rewards/EvidenceHallucination/std": 0.20155726373195648, "rewards/Evidence_Num_Record/mean": 2.6190476417541504, "rewards/Evidence_Num_Record/std": 1.513393521308899, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.23274962604045868, "rewards/VideoAccuracy/std": 0.3488112688064575, "step": 10, "train_speed(iter/s)": 0.015311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1539.0, "completions/mean_length": 598.3809814453125, "completions/min_length": 327.0, "entropy/max": 0.5703125, "entropy/mean": 0.326171875, "entropy/min": 0.150390625, "epoch": 0.011, "grad_norm": 1.2083434940567575, "kl": 0.00131988525390625, "learning_rate": 1.9999949650055508e-06, "loss": 1.3374148693401366e-05, "memory(GiB)": 144.94, "reward": 1.456288456916809, "reward_std": 0.40123605728149414, "rewards/EvidenceFormat/mean": 0.8333333730697632, "rewards/EvidenceFormat/std": 0.37719547748565674, "rewards/EvidenceHallucination/mean": 0.08484998345375061, "rewards/EvidenceHallucination/std": 0.19040553271770477, "rewards/Evidence_Num_Record/mean": 2.9285714626312256, "rewards/Evidence_Num_Record/std": 1.4715656042099, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.38931843638420105, "rewards/VideoAccuracy/std": 0.48434221744537354, "step": 11, "train_speed(iter/s)": 0.015617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/mean_length": 573.7142944335938, "completions/min_length": 328.0, "entropy/max": 1.7578125, "entropy/mean": 0.6953125, "entropy/min": 0.216796875, "epoch": 0.012, "grad_norm": 1.3300925852204148, "kl": 0.004669189453125, "learning_rate": 1.9999798600729064e-06, "loss": 4.8115143727045506e-05, "memory(GiB)": 144.94, "reward": 1.4905434846878052, "reward_std": 0.4766744375228882, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19081245362758636, "rewards/EvidenceHallucination/std": 0.23859429359436035, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 2.3507728576660156, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4523809552192688, "rewards/VideoAccuracy/std": 0.503760576248169, "step": 12, "train_speed(iter/s)": 0.015851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09523809523809523, "completions/max_length": 2625.0, "completions/mean_length": 737.5, "completions/min_length": 278.0, "entropy/max": 2.890625, "entropy/mean": 0.56640625, "entropy/min": 0.1083984375, "epoch": 0.013, "grad_norm": 1.3719241393661707, "kl": 0.013916015625, "learning_rate": 1.9999546853541726e-06, "loss": 0.00014006666606292129, "memory(GiB)": 144.94, "reward": 1.2101057767868042, "reward_std": 0.4757480025291443, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.09935478866100311, "rewards/EvidenceHallucination/std": 0.226935476064682, "rewards/Evidence_Num_Record/mean": 6.023809432983398, "rewards/Evidence_Num_Record/std": 8.721960067749023, "rewards/Format/mean": 0.9047619104385376, "rewards/Format/std": 0.297101765871048, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.23785389959812164, "rewards/VideoAccuracy/std": 0.43064427375793457, "step": 13, "train_speed(iter/s)": 0.01551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 749.0, "completions/mean_length": 433.3571472167969, "completions/min_length": 252.0, "entropy/max": 0.60546875, "entropy/mean": 0.390625, "entropy/min": 0.248046875, "epoch": 0.014, "grad_norm": 1.3206631905316553, "kl": 0.0034637451171875, "learning_rate": 1.9999194411028592e-06, "loss": 3.436854967731051e-05, "memory(GiB)": 144.94, "reward": 1.7792860269546509, "reward_std": 0.19403386116027832, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.22832076251506805, "rewards/EvidenceHallucination/std": 0.28095993399620056, "rewards/Evidence_Num_Record/mean": 2.6190476417541504, "rewards/Evidence_Num_Record/std": 0.7948732376098633, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.6217170357704163, "rewards/VideoAccuracy/std": 0.5551446676254272, "step": 14, "train_speed(iter/s)": 0.016096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1776.0, "completions/mean_length": 582.1428833007812, "completions/min_length": 353.0, "entropy/max": 1.90625, "entropy/mean": 0.55078125, "entropy/min": 0.1572265625, "epoch": 0.015, "grad_norm": 1.1286443819444592, "kl": 0.003997802734375, "learning_rate": 1.9998741276738752e-06, "loss": 4.373884803499095e-05, "memory(GiB)": 144.94, "reward": 1.4701656103134155, "reward_std": 0.29070937633514404, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19159749150276184, "rewards/EvidenceHallucination/std": 0.29467591643333435, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 2.836320638656616, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.36517950892448425, "rewards/VideoAccuracy/std": 0.4534320533275604, "step": 15, "train_speed(iter/s)": 0.016157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 638.0, "completions/mean_length": 438.5, "completions/min_length": 250.0, "entropy/max": 0.625, "entropy/mean": 0.3984375, "entropy/min": 0.251953125, "epoch": 0.016, "grad_norm": 1.156287440614561, "kl": 0.00811767578125, "learning_rate": 1.9998187455235257e-06, "loss": 8.322580833919346e-05, "memory(GiB)": 144.94, "reward": 1.3306825160980225, "reward_std": 0.251572847366333, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.10579296946525574, "rewards/EvidenceHallucination/std": 0.21332131326198578, "rewards/Evidence_Num_Record/mean": 2.738095283508301, "rewards/Evidence_Num_Record/std": 0.9642266035079956, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 16, "train_speed(iter/s)": 0.015815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 568.7380981445312, "completions/min_length": 274.0, "entropy/max": 0.8359375, "entropy/mean": 0.380859375, "entropy/min": 0.1259765625, "epoch": 0.017, "grad_norm": 1.4101739983074033, "kl": 0.0037384033203125, "learning_rate": 1.999753295209509e-06, "loss": 3.897860005963594e-05, "memory(GiB)": 145.69, "reward": 1.7066527605056763, "reward_std": 0.3545313775539398, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1598617285490036, "rewards/EvidenceHallucination/std": 0.2706254720687866, "rewards/Evidence_Num_Record/mean": 4.690476417541504, "rewards/Evidence_Num_Record/std": 6.809205055236816, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6318230628967285, "rewards/VideoAccuracy/std": 0.5390254259109497, "step": 17, "train_speed(iter/s)": 0.016041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/mean_length": 587.9285888671875, "completions/min_length": 293.0, "entropy/max": 1.0, "entropy/mean": 0.384765625, "entropy/min": 0.1318359375, "epoch": 0.018, "grad_norm": 1.2248047866328504, "kl": 0.0038299560546875, "learning_rate": 1.999677777390909e-06, "loss": 3.880564327118918e-05, "memory(GiB)": 145.69, "reward": 1.6634336709976196, "reward_std": 0.4180944859981537, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22601404786109924, "rewards/EvidenceHallucination/std": 0.30248087644577026, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 0.9169965982437134, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.48965945839881897, "rewards/VideoAccuracy/std": 0.43758147954940796, "step": 18, "train_speed(iter/s)": 0.016317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 688.0, "completions/mean_length": 464.0952453613281, "completions/min_length": 233.0, "entropy/max": 0.7578125, "entropy/mean": 0.451171875, "entropy/min": 0.28125, "epoch": 0.019, "grad_norm": 1.279202550632437, "kl": 0.007568359375, "learning_rate": 1.999592192828189e-06, "loss": 7.748680945951492e-05, "memory(GiB)": 145.69, "reward": 1.440986156463623, "reward_std": 0.35693225264549255, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18112100660800934, "rewards/EvidenceHallucination/std": 0.2568683624267578, "rewards/Evidence_Num_Record/mean": 3.095238208770752, "rewards/Evidence_Num_Record/std": 0.9830148220062256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4047619104385376, "rewards/VideoAccuracy/std": 0.49679574370384216, "step": 19, "train_speed(iter/s)": 0.016439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 502.9761962890625, "completions/min_length": 258.0, "entropy/max": 1.484375, "entropy/mean": 0.46484375, "entropy/min": 0.10693359375, "epoch": 0.02, "grad_norm": 1.3709799718611808, "kl": 0.00537109375, "learning_rate": 1.999496542383185e-06, "loss": 5.439633605419658e-05, "memory(GiB)": 145.69, "reward": 1.2718842029571533, "reward_std": 0.4565136432647705, "rewards/EvidenceFormat/mean": 0.8690476417541504, "rewards/EvidenceFormat/std": 0.332388311624527, "rewards/EvidenceHallucination/mean": 0.11449707299470901, "rewards/EvidenceHallucination/std": 0.2324623316526413, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 4.580674648284912, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.32636573910713196, "rewards/VideoAccuracy/std": 0.42334258556365967, "step": 20, "train_speed(iter/s)": 0.016233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 834.0, "completions/mean_length": 527.3333129882812, "completions/min_length": 279.0, "entropy/max": 0.62890625, "entropy/mean": 0.337890625, "entropy/min": 0.2041015625, "epoch": 0.021, "grad_norm": 1.3578766651839085, "kl": 0.005828857421875, "learning_rate": 1.9993908270190957e-06, "loss": 6.010277138557285e-05, "memory(GiB)": 145.74, "reward": 2.1086716651916504, "reward_std": 0.2845557630062103, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.2609308958053589, "rewards/EvidenceHallucination/std": 0.28345152735710144, "rewards/Evidence_Num_Record/mean": 2.904762029647827, "rewards/Evidence_Num_Record/std": 1.0548268556594849, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9285714626312256, "rewards/HonestTime/std": 0.26066118478775024, "rewards/VideoAccuracy/mean": 0.8826761245727539, "rewards/VideoAccuracy/std": 0.339295893907547, "step": 21, "train_speed(iter/s)": 0.016293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/mean_length": 494.26190185546875, "completions/min_length": 311.0, "entropy/max": 1.5625, "entropy/mean": 0.59375, "entropy/min": 0.302734375, "epoch": 0.022, "grad_norm": 1.253736043947575, "kl": 0.00787353515625, "learning_rate": 1.9992750478004735e-06, "loss": 8.163228631019592e-05, "memory(GiB)": 145.74, "reward": 1.3902297019958496, "reward_std": 0.36313265562057495, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.10591033846139908, "rewards/EvidenceHallucination/std": 0.22784608602523804, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 2.2285237312316895, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.380952388048172, "rewards/VideoAccuracy/std": 0.4915074110031128, "step": 22, "train_speed(iter/s)": 0.016359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 735.0, "completions/mean_length": 442.5952453613281, "completions/min_length": 244.0, "entropy/max": 0.58203125, "entropy/mean": 0.412109375, "entropy/min": 0.228515625, "epoch": 0.023, "grad_norm": 1.3193304711961666, "kl": 0.01055908203125, "learning_rate": 1.999149205893214e-06, "loss": 0.00010886572999879718, "memory(GiB)": 145.74, "reward": 1.3971387147903442, "reward_std": 0.39149364829063416, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.08098088949918747, "rewards/EvidenceHallucination/std": 0.20118315517902374, "rewards/Evidence_Num_Record/mean": 2.952380895614624, "rewards/Evidence_Num_Record/std": 0.9865530133247375, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.38094261288642883, "rewards/VideoAccuracy/std": 0.4571596384048462, "step": 23, "train_speed(iter/s)": 0.016467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 803.0, "completions/mean_length": 468.4761962890625, "completions/min_length": 305.0, "entropy/max": 0.5625, "entropy/mean": 0.357421875, "entropy/min": 0.1640625, "epoch": 0.024, "grad_norm": 1.0831692513277686, "kl": 0.00848388671875, "learning_rate": 1.9990133025645437e-06, "loss": 8.65261972649023e-05, "memory(GiB)": 145.74, "reward": 1.7580333948135376, "reward_std": 0.19246236979961395, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.19589346647262573, "rewards/EvidenceHallucination/std": 0.2664523124694824, "rewards/Evidence_Num_Record/mean": 2.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7419721484184265, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5476190447807312, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.6212357878684998, "rewards/VideoAccuracy/std": 0.5090880990028381, "step": 24, "train_speed(iter/s)": 0.016795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/mean_length": 544.3333129882812, "completions/min_length": 307.0, "entropy/max": 1.09375, "entropy/mean": 0.53515625, "entropy/min": 0.228515625, "epoch": 0.025, "grad_norm": 1.1987348090196133, "kl": 0.00848388671875, "learning_rate": 1.998867339183008e-06, "loss": 8.686093497090042e-05, "memory(GiB)": 145.74, "reward": 1.6837431192398071, "reward_std": 0.20683422684669495, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22472654283046722, "rewards/EvidenceHallucination/std": 0.27752161026000977, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 1.2742421627044678, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5721309781074524, "rewards/VideoAccuracy/std": 0.4405735433101654, "step": 25, "train_speed(iter/s)": 0.016877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 483.4285888671875, "completions/min_length": 285.0, "entropy/max": 2.34375, "entropy/mean": 0.5234375, "entropy/min": 0.10595703125, "epoch": 0.026, "grad_norm": 1.2446589358643094, "kl": 0.01214599609375, "learning_rate": 1.998711317218456e-06, "loss": 0.0001250980276381597, "memory(GiB)": 145.74, "reward": 1.3574994802474976, "reward_std": 0.29396677017211914, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.1208304911851883, "rewards/EvidenceHallucination/std": 0.2214687168598175, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 4.737720489501953, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3571428656578064, "rewards/VideoAccuracy/std": 0.48496562242507935, "step": 26, "train_speed(iter/s)": 0.016664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 909.0, "completions/mean_length": 478.0, "completions/min_length": 301.0, "entropy/max": 0.69921875, "entropy/mean": 0.396484375, "entropy/min": 0.2001953125, "epoch": 0.027, "grad_norm": 1.1043514885610453, "kl": 0.01055908203125, "learning_rate": 1.9985452382420274e-06, "loss": 0.00010822327749338001, "memory(GiB)": 145.74, "reward": 1.2988076210021973, "reward_std": 0.3214387893676758, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.08240236341953278, "rewards/EvidenceHallucination/std": 0.2126241773366928, "rewards/Evidence_Num_Record/mean": 2.857142925262451, "rewards/Evidence_Num_Record/std": 1.1384934186935425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.2156604379415512, "rewards/VideoAccuracy/std": 0.42541253566741943, "step": 27, "train_speed(iter/s)": 0.016879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 611.857177734375, "completions/min_length": 318.0, "entropy/max": 2.25, "entropy/mean": 0.5390625, "entropy/min": 0.193359375, "epoch": 0.028, "grad_norm": 0.9921823880704781, "kl": 0.006317138671875, "learning_rate": 1.9983691039261353e-06, "loss": 6.70288791297935e-05, "memory(GiB)": 145.75, "reward": 1.7207821607589722, "reward_std": 0.23402933776378632, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24490571022033691, "rewards/EvidenceHallucination/std": 0.3444436490535736, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 3.415905237197876, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.5952380895614624, "rewards/HonestTime/std": 0.49679574370384216, "rewards/VideoAccuracy/mean": 0.5646581053733826, "rewards/VideoAccuracy/std": 0.4903152585029602, "step": 28, "train_speed(iter/s)": 0.016718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1589.0, "completions/mean_length": 520.952392578125, "completions/min_length": 302.0, "entropy/max": 1.8125, "entropy/mean": 0.546875, "entropy/min": 0.2451171875, "epoch": 0.029, "grad_norm": 1.1703832973302537, "kl": 0.01055908203125, "learning_rate": 1.998182916044451e-06, "loss": 0.00010844170901691541, "memory(GiB)": 145.75, "reward": 1.4587047100067139, "reward_std": 0.4036465585231781, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1506655514240265, "rewards/EvidenceHallucination/std": 0.22032076120376587, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 1.656998634338379, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4285714328289032, "rewards/VideoAccuracy/std": 0.5008702874183655, "step": 29, "train_speed(iter/s)": 0.016683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.14285714285714285, "completions/max_length": 2625.0, "completions/mean_length": 753.0714721679688, "completions/min_length": 249.0, "entropy/max": 0.87890625, "entropy/mean": 0.384765625, "entropy/min": 0.07421875, "epoch": 0.03, "grad_norm": 1.2648430606097252, "kl": 0.2373046875, "learning_rate": 1.9979866764718843e-06, "loss": 0.0022539356723427773, "memory(GiB)": 145.75, "reward": 1.3076739311218262, "reward_std": 0.31140047311782837, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.09440181404352188, "rewards/EvidenceHallucination/std": 0.1834997832775116, "rewards/Evidence_Num_Record/mean": 7.476190567016602, "rewards/Evidence_Num_Record/std": 10.572025299072266, "rewards/Format/mean": 0.8571428656578064, "rewards/Format/std": 0.3541688024997711, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.360222190618515, "rewards/VideoAccuracy/std": 0.4654003381729126, "step": 30, "train_speed(iter/s)": 0.016483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 791.0, "completions/mean_length": 504.4761962890625, "completions/min_length": 305.0, "entropy/max": 0.546875, "entropy/mean": 0.322265625, "entropy/min": 0.1513671875, "epoch": 0.031, "grad_norm": 1.1330015201937758, "kl": 0.01275634765625, "learning_rate": 1.997780387184565e-06, "loss": 0.0001289858773816377, "memory(GiB)": 145.75, "reward": 2.1825642585754395, "reward_std": 0.20884746313095093, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2839708924293518, "rewards/EvidenceHallucination/std": 0.2942700684070587, "rewards/Evidence_Num_Record/mean": 3.1190476417541504, "rewards/Evidence_Num_Record/std": 0.8323454856872559, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.9352939128875732, "rewards/VideoAccuracy/std": 0.22921568155288696, "step": 31, "train_speed(iter/s)": 0.016511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2567.0, "completions/mean_length": 571.6190795898438, "completions/min_length": 273.0, "entropy/max": 1.5390625, "entropy/mean": 0.60546875, "entropy/min": 0.09423828125, "epoch": 0.032, "grad_norm": 0.9103830840092558, "kl": 0.01263427734375, "learning_rate": 1.997564050259824e-06, "loss": 0.00013467957614921033, "memory(GiB)": 145.75, "reward": 1.2602533102035522, "reward_std": 0.2785269618034363, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.11079034209251404, "rewards/EvidenceHallucination/std": 0.22842200100421906, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 5.751180171966553, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2380952388048172, "rewards/VideoAccuracy/std": 0.43108054995536804, "step": 32, "train_speed(iter/s)": 0.016409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07142857142857142, "completions/max_length": 2625.0, "completions/mean_length": 679.0, "completions/min_length": 292.0, "entropy/max": 0.7421875, "entropy/mean": 0.34765625, "entropy/min": 0.09521484375, "epoch": 0.033, "grad_norm": 1.1513595208744345, "kl": 0.3828125, "learning_rate": 1.997337667876172e-06, "loss": 0.0021276986226439476, "memory(GiB)": 145.75, "reward": 1.185469388961792, "reward_std": 0.4486236274242401, "rewards/EvidenceFormat/mean": 0.8809524178504944, "rewards/EvidenceFormat/std": 0.32777005434036255, "rewards/EvidenceHallucination/mean": 0.11636475473642349, "rewards/EvidenceHallucination/std": 0.25253647565841675, "rewards/Evidence_Num_Record/mean": 4.857142925262451, "rewards/Evidence_Num_Record/std": 7.579083442687988, "rewards/Format/mean": 0.9285714626312256, "rewards/Format/std": 0.26066118478775024, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2574344873428345, "rewards/VideoAccuracy/std": 0.42979881167411804, "step": 33, "train_speed(iter/s)": 0.016317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/mean_length": 494.26190185546875, "completions/min_length": 267.0, "entropy/max": 1.1484375, "entropy/mean": 0.37890625, "entropy/min": 0.1552734375, "epoch": 0.034, "grad_norm": 1.0925109804515087, "kl": 0.018310546875, "learning_rate": 1.9971012423132772e-06, "loss": 0.00018710496078711003, "memory(GiB)": 145.75, "reward": 1.7562119960784912, "reward_std": 0.12298288941383362, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19665543735027313, "rewards/EvidenceHallucination/std": 0.269692063331604, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 1.064690351486206, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.5883094072341919, "rewards/VideoAccuracy/std": 0.4778609871864319, "step": 34, "train_speed(iter/s)": 0.016519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 869.0, "completions/mean_length": 510.452392578125, "completions/min_length": 327.0, "entropy/max": 1.046875, "entropy/mean": 0.58203125, "entropy/min": 0.1787109375, "epoch": 0.035, "grad_norm": 1.248908336888557, "kl": 0.01300048828125, "learning_rate": 1.9968547759519425e-06, "loss": 0.00013340359146241099, "memory(GiB)": 145.75, "reward": 1.4004958868026733, "reward_std": 0.34659308195114136, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1086525246500969, "rewards/EvidenceHallucination/std": 0.21830326318740845, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.8850939869880676, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.31209874153137207, "rewards/VideoAccuracy/std": 0.41593098640441895, "step": 35, "train_speed(iter/s)": 0.016603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 951.0, "completions/mean_length": 431.5714416503906, "completions/min_length": 267.0, "entropy/max": 0.56640625, "entropy/mean": 0.361328125, "entropy/min": 0.181640625, "epoch": 0.036, "grad_norm": 1.2278684525023777, "kl": 0.0172119140625, "learning_rate": 1.9965982712740806e-06, "loss": 0.0001762424799380824, "memory(GiB)": 145.75, "reward": 1.3322027921676636, "reward_std": 0.3921200931072235, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1133946031332016, "rewards/EvidenceHallucination/std": 0.22268490493297577, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 1.5151193141937256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 36, "train_speed(iter/s)": 0.01668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/mean_length": 410.76190185546875, "completions/min_length": 246.0, "entropy/max": 0.5390625, "entropy/mean": 0.330078125, "entropy/min": 0.166015625, "epoch": 0.037, "grad_norm": 1.210366128733837, "kl": 0.017578125, "learning_rate": 1.996331730862691e-06, "loss": 0.00018034478125628084, "memory(GiB)": 145.75, "reward": 1.5425349473953247, "reward_std": 0.16982388496398926, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2175394743680954, "rewards/EvidenceHallucination/std": 0.31620246171951294, "rewards/Evidence_Num_Record/mean": 2.952380895614624, "rewards/Evidence_Num_Record/std": 1.8206455707550049, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.43236052989959717, "rewards/VideoAccuracy/std": 0.4914357364177704, "step": 37, "train_speed(iter/s)": 0.016778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 926.0, "completions/mean_length": 509.21429443359375, "completions/min_length": 295.0, "entropy/max": 0.8828125, "entropy/mean": 0.412109375, "entropy/min": 0.1279296875, "epoch": 0.038, "grad_norm": 1.0799898044921437, "kl": 0.01458740234375, "learning_rate": 1.996055157401834e-06, "loss": 0.00014850683510303497, "memory(GiB)": 145.75, "reward": 1.6375964879989624, "reward_std": 0.2798987627029419, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.15529486536979675, "rewards/EvidenceHallucination/std": 0.27660584449768066, "rewards/Evidence_Num_Record/mean": 3.2142858505249023, "rewards/Evidence_Num_Record/std": 0.8981204628944397, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4732043147087097, "rewards/VideoAccuracy/std": 0.43239283561706543, "step": 38, "train_speed(iter/s)": 0.016841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 691.0, "completions/mean_length": 413.73809814453125, "completions/min_length": 273.0, "entropy/max": 1.5234375, "entropy/mean": 0.51171875, "entropy/min": 0.16796875, "epoch": 0.039, "grad_norm": 1.360366339394654, "kl": 0.0220947265625, "learning_rate": 1.9957685536765995e-06, "loss": 0.00022746861213818192, "memory(GiB)": 145.75, "reward": 1.3803130388259888, "reward_std": 0.4381098747253418, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.11585082858800888, "rewards/EvidenceHallucination/std": 0.1961347460746765, "rewards/Evidence_Num_Record/mean": 2.8809523582458496, "rewards/Evidence_Num_Record/std": 0.8611501455307007, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3571428656578064, "rewards/VideoAccuracy/std": 0.48496559262275696, "step": 39, "train_speed(iter/s)": 0.01702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 511.0, "completions/mean_length": 344.73809814453125, "completions/min_length": 247.0, "entropy/max": 0.88671875, "entropy/mean": 0.404296875, "entropy/min": 0.2314453125, "epoch": 0.04, "grad_norm": 1.4382404832091151, "kl": 0.0233154296875, "learning_rate": 1.9954719225730845e-06, "loss": 0.00023420357319992036, "memory(GiB)": 145.75, "reward": 1.3400732278823853, "reward_std": 0.3356240689754486, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.17973728477954865, "rewards/EvidenceHallucination/std": 0.31203538179397583, "rewards/Evidence_Num_Record/mean": 2.357142925262451, "rewards/Evidence_Num_Record/std": 0.5768471360206604, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.30412575602531433, "rewards/VideoAccuracy/std": 0.44932904839515686, "step": 40, "train_speed(iter/s)": 0.016963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 866.0, "completions/mean_length": 505.71429443359375, "completions/min_length": 285.0, "entropy/max": 0.5234375, "entropy/mean": 0.31640625, "entropy/min": 0.1474609375, "epoch": 0.041, "grad_norm": 1.2328074341784987, "kl": 0.0166015625, "learning_rate": 1.995165267078361e-06, "loss": 0.00016819580923765898, "memory(GiB)": 145.75, "reward": 1.7048735618591309, "reward_std": 0.33454629778862, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24886250495910645, "rewards/EvidenceHallucination/std": 0.35649824142456055, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.9833101630210876, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.4598630368709564, "rewards/VideoAccuracy/std": 0.45265597105026245, "step": 41, "train_speed(iter/s)": 0.017079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 678.0, "completions/mean_length": 404.4047546386719, "completions/min_length": 301.0, "entropy/max": 1.2421875, "entropy/mean": 0.58984375, "entropy/min": 0.2216796875, "epoch": 0.042, "grad_norm": 1.4974546558176582, "kl": 0.0262451171875, "learning_rate": 1.994848590280447e-06, "loss": 0.0002636639983393252, "memory(GiB)": 145.75, "reward": 1.4940990209579468, "reward_std": 0.4433921277523041, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20859037339687347, "rewards/EvidenceHallucination/std": 0.2495918869972229, "rewards/Evidence_Num_Record/mean": 2.904762029647827, "rewards/Evidence_Num_Record/std": 0.9055256247520447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4523809552192688, "rewards/VideoAccuracy/std": 0.503760576248169, "step": 42, "train_speed(iter/s)": 0.017094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 804.0, "completions/mean_length": 391.0952453613281, "completions/min_length": 230.0, "entropy/max": 0.51171875, "entropy/mean": 0.36328125, "entropy/min": 0.2001953125, "epoch": 0.043, "grad_norm": 1.178164764207004, "kl": 0.0277099609375, "learning_rate": 1.994521895368273e-06, "loss": 0.000281293730949983, "memory(GiB)": 145.75, "reward": 1.5839663743972778, "reward_std": 0.2064477503299713, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1908642202615738, "rewards/EvidenceHallucination/std": 0.2820035517215729, "rewards/Evidence_Num_Record/mean": 2.904762029647827, "rewards/Evidence_Num_Record/std": 1.0777013301849365, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5457934737205505, "rewards/VideoAccuracy/std": 0.5020904541015625, "step": 43, "train_speed(iter/s)": 0.017206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 640.0, "completions/mean_length": 396.23809814453125, "completions/min_length": 269.0, "entropy/max": 0.7578125, "entropy/mean": 0.376953125, "entropy/min": 0.189453125, "epoch": 0.044, "grad_norm": 1.480695163848795, "kl": 0.0230712890625, "learning_rate": 1.9941851856316543e-06, "loss": 0.00023624445020686835, "memory(GiB)": 145.75, "reward": 1.9166960716247559, "reward_std": 0.23344223201274872, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.32448601722717285, "rewards/EvidenceHallucination/std": 0.3242557942867279, "rewards/Evidence_Num_Record/mean": 2.6666667461395264, "rewards/Evidence_Num_Record/std": 0.6866910457611084, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.730370283126831, "rewards/VideoAccuracy/std": 0.39827626943588257, "step": 44, "train_speed(iter/s)": 0.01731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 625.0, "completions/mean_length": 414.5714416503906, "completions/min_length": 265.0, "entropy/max": 0.9453125, "entropy/mean": 0.451171875, "entropy/min": 0.1806640625, "epoch": 0.045, "grad_norm": 1.3572922073713911, "kl": 0.02294921875, "learning_rate": 1.993838464461254e-06, "loss": 0.0002286377566633746, "memory(GiB)": 145.75, "reward": 1.5818372964859009, "reward_std": 0.3606022596359253, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24391533434391022, "rewards/EvidenceHallucination/std": 0.3106663227081299, "rewards/Evidence_Num_Record/mean": 2.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7419722080230713, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.46638762950897217, "rewards/VideoAccuracy/std": 0.4565890431404114, "step": 45, "train_speed(iter/s)": 0.017377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 357.0952453613281, "completions/min_length": 270.0, "entropy/max": 0.796875, "entropy/mean": 0.3984375, "entropy/min": 0.2255859375, "epoch": 0.046, "grad_norm": 1.5353899061823615, "kl": 0.034912109375, "learning_rate": 1.9934817353485502e-06, "loss": 0.00035044411197304726, "memory(GiB)": 145.75, "reward": 1.6934123039245605, "reward_std": 0.4521592855453491, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.26005518436431885, "rewards/EvidenceHallucination/std": 0.3091502785682678, "rewards/Evidence_Num_Record/mean": 2.5238096714019775, "rewards/Evidence_Num_Record/std": 0.8333914279937744, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6414012908935547, "rewards/VideoAccuracy/std": 0.4838889539241791, "step": 46, "train_speed(iter/s)": 0.01738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 349.3571472167969, "completions/min_length": 237.0, "entropy/max": 0.5703125, "entropy/mean": 0.3515625, "entropy/min": 0.201171875, "epoch": 0.047, "grad_norm": 1.3887883228401308, "kl": 0.0296630859375, "learning_rate": 1.993115001885801e-06, "loss": 0.00029792386339977384, "memory(GiB)": 145.75, "reward": 1.5804702043533325, "reward_std": 0.2644038200378418, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20623116195201874, "rewards/EvidenceHallucination/std": 0.3177759349346161, "rewards/Evidence_Num_Record/mean": 2.6190476417541504, "rewards/Evidence_Num_Record/std": 0.7635724544525146, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4725571274757385, "rewards/VideoAccuracy/std": 0.4981401860713959, "step": 47, "train_speed(iter/s)": 0.017459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 972.0, "completions/mean_length": 460.0714416503906, "completions/min_length": 264.0, "entropy/max": 0.9609375, "entropy/mean": 0.404296875, "entropy/min": 0.189453125, "epoch": 0.048, "grad_norm": 1.3369684895102307, "kl": 0.02197265625, "learning_rate": 1.9927382677660083e-06, "loss": 0.00022233667550608516, "memory(GiB)": 145.75, "reward": 1.6147117614746094, "reward_std": 0.37551945447921753, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19136324524879456, "rewards/EvidenceHallucination/std": 0.29685333371162415, "rewards/Evidence_Num_Record/mean": 2.904762029647827, "rewards/Evidence_Num_Record/std": 0.9055256843566895, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.44310569763183594, "rewards/VideoAccuracy/std": 0.4097944498062134, "step": 48, "train_speed(iter/s)": 0.017496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 331.6428527832031, "completions/min_length": 245.0, "entropy/max": 1.1875, "entropy/mean": 0.470703125, "entropy/min": 0.2734375, "epoch": 0.049, "grad_norm": 1.4428138106775554, "kl": 0.03515625, "learning_rate": 1.992351536782881e-06, "loss": 0.0003579538897611201, "memory(GiB)": 145.75, "reward": 1.62581205368042, "reward_std": 0.28599339723587036, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27191683650016785, "rewards/EvidenceHallucination/std": 0.2856244444847107, "rewards/Evidence_Num_Record/mean": 2.261904716491699, "rewards/Evidence_Num_Record/std": 0.49679577350616455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5714285969734192, "rewards/VideoAccuracy/std": 0.5008703470230103, "step": 49, "train_speed(iter/s)": 0.017508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 756.0, "completions/mean_length": 333.69049072265625, "completions/min_length": 250.0, "entropy/max": 0.58203125, "entropy/mean": 0.380859375, "entropy/min": 0.236328125, "epoch": 0.05, "grad_norm": 1.473115622410385, "kl": 0.033447265625, "learning_rate": 1.991954812830795e-06, "loss": 0.00033629988320171833, "memory(GiB)": 145.75, "reward": 1.46311616897583, "reward_std": 0.3826943039894104, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21974819898605347, "rewards/EvidenceHallucination/std": 0.29454198479652405, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.2971017360687256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4191664755344391, "rewards/VideoAccuracy/std": 0.4755955636501312, "step": 50, "train_speed(iter/s)": 0.017564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 577.0, "completions/mean_length": 408.4761962890625, "completions/min_length": 272.0, "entropy/max": 0.4921875, "entropy/mean": 0.310546875, "entropy/min": 0.18359375, "epoch": 0.051, "grad_norm": 0.9285880403146588, "kl": 0.0279541015625, "learning_rate": 1.991548099904757e-06, "loss": 0.0006827338947914541, "memory(GiB)": 145.75, "reward": 1.8026541471481323, "reward_std": 0.11613352596759796, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30948394536972046, "rewards/EvidenceHallucination/std": 0.3458852767944336, "rewards/Evidence_Num_Record/mean": 2.642857074737549, "rewards/Evidence_Num_Record/std": 0.5328903794288635, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5407573580741882, "rewards/VideoAccuracy/std": 0.4804791510105133, "step": 51, "train_speed(iter/s)": 0.017656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/mean_length": 340.8571472167969, "completions/min_length": 240.0, "entropy/max": 1.109375, "entropy/mean": 0.5703125, "entropy/min": 0.25, "epoch": 0.052, "grad_norm": 1.571728483796054, "kl": 0.046630859375, "learning_rate": 1.991131402100361e-06, "loss": 0.0004689935012720525, "memory(GiB)": 145.75, "reward": 1.7956736087799072, "reward_std": 0.3757343888282776, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40693891048431396, "rewards/EvidenceHallucination/std": 0.2946487367153168, "rewards/Evidence_Num_Record/mean": 2.3333334922790527, "rewards/Evidence_Num_Record/std": 0.5702658891677856, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7142857313156128, "rewards/VideoAccuracy/std": 0.45722997188568115, "step": 52, "train_speed(iter/s)": 0.017687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 518.0, "completions/mean_length": 345.8809509277344, "completions/min_length": 274.0, "entropy/max": 0.6484375, "entropy/mean": 0.369140625, "entropy/min": 0.2265625, "epoch": 0.053, "grad_norm": 1.1307122156401865, "kl": 0.036865234375, "learning_rate": 1.9907047236137496e-06, "loss": 0.00037122820504009724, "memory(GiB)": 145.75, "reward": 1.574357509613037, "reward_std": 0.19801990687847137, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.26589876413345337, "rewards/EvidenceHallucination/std": 0.3213001489639282, "rewards/Evidence_Num_Record/mean": 2.1190476417541504, "rewards/Evidence_Num_Record/std": 0.32777008414268494, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5211776494979858, "rewards/VideoAccuracy/std": 0.5030077695846558, "step": 53, "train_speed(iter/s)": 0.017759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 867.0, "completions/mean_length": 401.66668701171875, "completions/min_length": 270.0, "entropy/max": 0.49609375, "entropy/mean": 0.337890625, "entropy/min": 0.2001953125, "epoch": 0.054, "grad_norm": 1.3118260485938609, "kl": 0.032470703125, "learning_rate": 1.99026806874157e-06, "loss": 0.00032804696820676327, "memory(GiB)": 145.75, "reward": 1.6673247814178467, "reward_std": 0.34364157915115356, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20881755650043488, "rewards/EvidenceHallucination/std": 0.2592983841896057, "rewards/Evidence_Num_Record/mean": 2.6190476417541504, "rewards/Evidence_Num_Record/std": 1.0809296369552612, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4922279119491577, "rewards/VideoAccuracy/std": 0.4640647768974304, "step": 54, "train_speed(iter/s)": 0.017802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 534.0, "completions/mean_length": 380.3095397949219, "completions/min_length": 237.0, "entropy/max": 0.953125, "entropy/mean": 0.49609375, "entropy/min": 0.17578125, "epoch": 0.055, "grad_norm": 1.1253343964624638, "kl": 0.035400390625, "learning_rate": 1.9898214418809326e-06, "loss": 0.00035857115290127695, "memory(GiB)": 145.75, "reward": 1.5038208961486816, "reward_std": 0.19950318336486816, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24851542711257935, "rewards/EvidenceHallucination/std": 0.3577192723751068, "rewards/Evidence_Num_Record/mean": 2.4285714626312256, "rewards/Evidence_Num_Record/std": 0.5008703470230103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.38745105266571045, "rewards/VideoAccuracy/std": 0.5098853707313538, "step": 55, "train_speed(iter/s)": 0.017879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.0, "completions/mean_length": 303.4047546386719, "completions/min_length": 223.0, "entropy/max": 1.0390625, "entropy/mean": 0.39453125, "entropy/min": 0.1943359375, "epoch": 0.056, "grad_norm": 1.341577119659631, "kl": 0.05078125, "learning_rate": 1.9893648475293647e-06, "loss": 0.0005097612738609314, "memory(GiB)": 145.75, "reward": 1.6940311193466187, "reward_std": 0.35864514112472534, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3749173879623413, "rewards/EvidenceHallucination/std": 0.32496383786201477, "rewards/Evidence_Num_Record/mean": 2.3333334922790527, "rewards/Evidence_Num_Record/std": 0.6115421056747437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6190476417541504, "rewards/VideoAccuracy/std": 0.4915074408054352, "step": 56, "train_speed(iter/s)": 0.017889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 451.0, "completions/mean_length": 304.9761962890625, "completions/min_length": 192.0, "entropy/max": 0.515625, "entropy/mean": 0.341796875, "entropy/min": 0.201171875, "epoch": 0.057, "grad_norm": 1.2402957624594078, "kl": 0.0419921875, "learning_rate": 1.9888982902847653e-06, "loss": 0.0004201154224574566, "memory(GiB)": 145.75, "reward": 1.3021140098571777, "reward_std": 0.13262939453125, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.10855767130851746, "rewards/EvidenceHallucination/std": 0.21811561286449432, "rewards/Evidence_Num_Record/mean": 2.2142858505249023, "rewards/Evidence_Num_Record/std": 0.4703768193721771, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.21373586356639862, "rewards/VideoAccuracy/std": 0.39836785197257996, "step": 57, "train_speed(iter/s)": 0.01797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 729.0, "completions/mean_length": 451.3809509277344, "completions/min_length": 285.0, "entropy/max": 1.046875, "entropy/mean": 0.44140625, "entropy/min": 0.2080078125, "epoch": 0.058, "grad_norm": 1.1393164343744608, "kl": 0.033447265625, "learning_rate": 1.988421774845362e-06, "loss": 0.0003366103337612003, "memory(GiB)": 145.75, "reward": 1.8692452907562256, "reward_std": 0.24157139658927917, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3845154047012329, "rewards/EvidenceHallucination/std": 0.34343352913856506, "rewards/Evidence_Num_Record/mean": 2.7857143878936768, "rewards/Evidence_Num_Record/std": 0.7168942093849182, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6590089797973633, "rewards/VideoAccuracy/std": 0.4387028217315674, "step": 58, "train_speed(iter/s)": 0.017979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 487.0, "completions/mean_length": 327.5476379394531, "completions/min_length": 212.0, "entropy/max": 1.1015625, "entropy/mean": 0.48828125, "entropy/min": 0.181640625, "epoch": 0.059, "grad_norm": 1.3073129517153632, "kl": 0.0498046875, "learning_rate": 1.98793530600966e-06, "loss": 0.000497853965498507, "memory(GiB)": 145.75, "reward": 1.3389884233474731, "reward_std": 0.3814266622066498, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14732250571250916, "rewards/EvidenceHallucination/std": 0.236972376704216, "rewards/Evidence_Num_Record/mean": 2.190476179122925, "rewards/Evidence_Num_Record/std": 0.45468270778656006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 59, "train_speed(iter/s)": 0.017988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/mean_length": 327.0952453613281, "completions/min_length": 240.0, "entropy/max": 1.078125, "entropy/mean": 0.47265625, "entropy/min": 0.2236328125, "epoch": 0.06, "grad_norm": 1.4469148169383166, "kl": 0.042236328125, "learning_rate": 1.987438888676394e-06, "loss": 0.00042862416012212634, "memory(GiB)": 145.75, "reward": 1.3566797971725464, "reward_std": 0.32148292660713196, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18699973821640015, "rewards/EvidenceHallucination/std": 0.300551176071167, "rewards/Evidence_Num_Record/mean": 2.1666667461395264, "rewards/Evidence_Num_Record/std": 0.4371005594730377, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.31927984952926636, "rewards/VideoAccuracy/std": 0.4577767848968506, "step": 60, "train_speed(iter/s)": 0.018037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 677.0, "completions/mean_length": 406.8333435058594, "completions/min_length": 238.0, "entropy/max": 0.48046875, "entropy/mean": 0.27734375, "entropy/min": 0.19921875, "epoch": 0.061, "grad_norm": 1.3020082787985705, "kl": 0.032958984375, "learning_rate": 1.986932527844482e-06, "loss": 0.0003307433507870883, "memory(GiB)": 145.75, "reward": 1.9191844463348389, "reward_std": 0.3503304719924927, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35135334730148315, "rewards/EvidenceHallucination/std": 0.33822157979011536, "rewards/Evidence_Num_Record/mean": 2.642857074737549, "rewards/Evidence_Num_Record/std": 0.7593780755996704, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.648913562297821, "rewards/VideoAccuracy/std": 0.41752249002456665, "step": 61, "train_speed(iter/s)": 0.018009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 453.0, "completions/mean_length": 331.8095397949219, "completions/min_length": 231.0, "entropy/max": 0.97265625, "entropy/mean": 0.50390625, "entropy/min": 0.189453125, "epoch": 0.062, "grad_norm": 1.3181632186820587, "kl": 0.053955078125, "learning_rate": 1.9864162286129716e-06, "loss": 0.0005426580901257694, "memory(GiB)": 145.75, "reward": 1.3668583631515503, "reward_std": 0.26346227526664734, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16762493550777435, "rewards/EvidenceHallucination/std": 0.2664393186569214, "rewards/Evidence_Num_Record/mean": 2.1666667461395264, "rewards/Evidence_Num_Record/std": 0.48973196744918823, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3333333432674408, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 62, "train_speed(iter/s)": 0.018081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 490.0, "completions/mean_length": 308.5476379394531, "completions/min_length": 222.0, "entropy/max": 0.63671875, "entropy/mean": 0.36328125, "entropy/min": 0.20703125, "epoch": 0.063, "grad_norm": 1.3916223637864948, "kl": 0.0546875, "learning_rate": 1.9858899961809902e-06, "loss": 0.0005506295128725469, "memory(GiB)": 145.75, "reward": 1.3270175457000732, "reward_std": 0.4147834777832031, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.08936237543821335, "rewards/EvidenceHallucination/std": 0.17755059897899628, "rewards/Evidence_Num_Record/mean": 2.0238096714019775, "rewards/Evidence_Num_Record/std": 0.26942533254623413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.30914509296417236, "rewards/VideoAccuracy/std": 0.46733471751213074, "step": 63, "train_speed(iter/s)": 0.018122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 310.76190185546875, "completions/min_length": 232.0, "entropy/max": 0.392578125, "entropy/mean": 0.298828125, "entropy/min": 0.1513671875, "epoch": 0.064, "grad_norm": 1.096097392343389, "kl": 0.0517578125, "learning_rate": 1.985353835847693e-06, "loss": 0.0007183492416515946, "memory(GiB)": 145.75, "reward": 1.6550796031951904, "reward_std": 0.14945176243782043, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2174966037273407, "rewards/EvidenceHallucination/std": 0.27638477087020874, "rewards/Evidence_Num_Record/mean": 2.1666667461395264, "rewards/Evidence_Num_Record/std": 0.37719547748565674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.47824692726135254, "rewards/VideoAccuracy/std": 0.49298593401908875, "step": 64, "train_speed(iter/s)": 0.018253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 637.0, "completions/mean_length": 397.3571472167969, "completions/min_length": 282.0, "entropy/max": 0.87890625, "entropy/mean": 0.421875, "entropy/min": 0.125, "epoch": 0.065, "grad_norm": 1.5461053818811703, "kl": 0.0498046875, "learning_rate": 1.984807753012208e-06, "loss": 0.0005011714529246092, "memory(GiB)": 145.75, "reward": 1.7878233194351196, "reward_std": 0.37429529428482056, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31716659665107727, "rewards/EvidenceHallucination/std": 0.2837255001068115, "rewards/Evidence_Num_Record/mean": 2.595238208770752, "rewards/Evidence_Num_Record/std": 0.7669872045516968, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6577231287956238, "rewards/VideoAccuracy/std": 0.4496304392814636, "step": 65, "train_speed(iter/s)": 0.018271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 454.0, "completions/mean_length": 306.21429443359375, "completions/min_length": 237.0, "entropy/max": 1.0390625, "entropy/mean": 0.373046875, "entropy/min": 0.2177734375, "epoch": 0.066, "grad_norm": 1.5463420602194187, "kl": 0.060302734375, "learning_rate": 1.9842517531735837e-06, "loss": 0.0006045004702173173, "memory(GiB)": 145.75, "reward": 1.6040095090866089, "reward_std": 0.5427826642990112, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28338417410850525, "rewards/EvidenceHallucination/std": 0.29940205812454224, "rewards/Evidence_Num_Record/mean": 2.1190476417541504, "rewards/Evidence_Num_Record/std": 0.66999751329422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5473327040672302, "rewards/VideoAccuracy/std": 0.5034978985786438, "step": 66, "train_speed(iter/s)": 0.018263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 295.8095397949219, "completions/min_length": 224.0, "entropy/max": 0.625, "entropy/mean": 0.357421875, "entropy/min": 0.2080078125, "epoch": 0.067, "grad_norm": 1.391620079806127, "kl": 0.055419921875, "learning_rate": 1.983685841930732e-06, "loss": 0.0005547074833884835, "memory(GiB)": 145.75, "reward": 1.4512310028076172, "reward_std": 0.2663763165473938, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13462595641613007, "rewards/EvidenceHallucination/std": 0.22502124309539795, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.43108054995536804, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.3576390743255615, "rewards/VideoAccuracy/std": 0.4690335988998413, "step": 67, "train_speed(iter/s)": 0.018329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 623.0, "completions/mean_length": 384.21429443359375, "completions/min_length": 227.0, "entropy/max": 0.77734375, "entropy/mean": 0.322265625, "entropy/min": 0.1748046875, "epoch": 0.068, "grad_norm": 1.3718544211406185, "kl": 0.042236328125, "learning_rate": 1.983110024982373e-06, "loss": 0.00042220597970299423, "memory(GiB)": 145.75, "reward": 1.8695082664489746, "reward_std": 0.3609388768672943, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.25149857997894287, "rewards/EvidenceHallucination/std": 0.26422175765037537, "rewards/Evidence_Num_Record/mean": 2.5, "rewards/Evidence_Num_Record/std": 0.5521576404571533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6858752369880676, "rewards/VideoAccuracy/std": 0.42166298627853394, "step": 68, "train_speed(iter/s)": 0.018371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 493.0, "completions/mean_length": 318.5, "completions/min_length": 224.0, "entropy/max": 0.97265625, "entropy/mean": 0.458984375, "entropy/min": 0.2412109375, "epoch": 0.069, "grad_norm": 1.693376453582021, "kl": 0.06494140625, "learning_rate": 1.982524308126977e-06, "loss": 0.0006529532838612795, "memory(GiB)": 145.75, "reward": 1.678009033203125, "reward_std": 0.360478013753891, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29480722546577454, "rewards/EvidenceHallucination/std": 0.28128427267074585, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.37020254135131836, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6190476417541504, "rewards/VideoAccuracy/std": 0.4915074408054352, "step": 69, "train_speed(iter/s)": 0.018359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/mean_length": 288.69049072265625, "completions/min_length": 156.0, "entropy/max": 0.703125, "entropy/mean": 0.341796875, "entropy/min": 0.19921875, "epoch": 0.07, "grad_norm": 1.5434923760121793, "kl": 0.061279296875, "learning_rate": 1.9819286972627067e-06, "loss": 0.0006184515659697354, "memory(GiB)": 145.75, "reward": 1.2753374576568604, "reward_std": 0.37550923228263855, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.08139073103666306, "rewards/EvidenceHallucination/std": 0.19860804080963135, "rewards/Evidence_Num_Record/mean": 1.9523810148239136, "rewards/Evidence_Num_Record/std": 0.30860671401023865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2590593695640564, "rewards/VideoAccuracy/std": 0.41239312291145325, "step": 70, "train_speed(iter/s)": 0.018408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 758.0, "completions/mean_length": 444.0238037109375, "completions/min_length": 257.0, "entropy/max": 0.431640625, "entropy/mean": 0.279296875, "entropy/min": 0.142578125, "epoch": 0.071, "grad_norm": 0.9259762128106337, "kl": 0.044189453125, "learning_rate": 1.981323198387356e-06, "loss": 0.0008414517506025732, "memory(GiB)": 145.75, "reward": 1.4682765007019043, "reward_std": 0.16010618209838867, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.04678576812148094, "rewards/EvidenceHallucination/std": 0.15406948328018188, "rewards/Evidence_Num_Record/mean": 3.0, "rewards/Evidence_Num_Record/std": 1.0121216773986816, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.25891920924186707, "rewards/VideoAccuracy/std": 0.34589600563049316, "step": 71, "train_speed(iter/s)": 0.018451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.0, "completions/mean_length": 313.19049072265625, "completions/min_length": 224.0, "entropy/max": 0.8671875, "entropy/mean": 0.44140625, "entropy/min": 0.2490234375, "epoch": 0.072, "grad_norm": 1.370825290226045, "kl": 0.06298828125, "learning_rate": 1.9807078175982922e-06, "loss": 0.0006318792584352195, "memory(GiB)": 145.75, "reward": 1.468855857849121, "reward_std": 0.4081823527812958, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.14189793169498444, "rewards/EvidenceHallucination/std": 0.1861451119184494, "rewards/Evidence_Num_Record/mean": 2.0, "rewards/Evidence_Num_Record/std": 0.44172608852386475, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4523809552192688, "rewards/VideoAccuracy/std": 0.5037605166435242, "step": 72, "train_speed(iter/s)": 0.018458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.0, "completions/mean_length": 308.21429443359375, "completions/min_length": 242.0, "entropy/max": 0.65625, "entropy/mean": 0.376953125, "entropy/min": 0.197265625, "epoch": 0.073, "grad_norm": 1.5900604917883046, "kl": 0.06787109375, "learning_rate": 1.980082561092393e-06, "loss": 0.0006817152607254684, "memory(GiB)": 145.75, "reward": 1.5464859008789062, "reward_std": 0.3614649176597595, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22563165426254272, "rewards/EvidenceHallucination/std": 0.25571611523628235, "rewards/Evidence_Num_Record/mean": 2.238095283508301, "rewards/Evidence_Num_Record/std": 0.43108054995536804, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5013597011566162, "rewards/VideoAccuracy/std": 0.501571536064148, "step": 73, "train_speed(iter/s)": 0.018492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.0, "completions/mean_length": 319.3571472167969, "completions/min_length": 214.0, "entropy/max": 0.671875, "entropy/mean": 0.3515625, "entropy/min": 0.2373046875, "epoch": 0.074, "grad_norm": 1.3190473186336864, "kl": 0.06787109375, "learning_rate": 1.9794474351659853e-06, "loss": 0.0010814255801960826, "memory(GiB)": 145.75, "reward": 1.4756110906600952, "reward_std": 0.21122239530086517, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430335700511932, "rewards/EvidenceHallucination/mean": 0.16264285147190094, "rewards/EvidenceHallucination/std": 0.27389493584632874, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.5763435363769531, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.3216537833213806, "rewards/VideoAccuracy/std": 0.42325273156166077, "step": 74, "train_speed(iter/s)": 0.018551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 788.0, "completions/mean_length": 394.26190185546875, "completions/min_length": 182.0, "entropy/max": 0.9609375, "entropy/mean": 0.40625, "entropy/min": 0.1435546875, "epoch": 0.075, "grad_norm": 0.8582498090559345, "kl": 0.057373046875, "learning_rate": 1.978802446214779e-06, "loss": 0.0005849922308698297, "memory(GiB)": 145.75, "reward": 1.2947375774383545, "reward_std": 0.20632633566856384, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.08700108528137207, "rewards/EvidenceHallucination/std": 0.2174977958202362, "rewards/Evidence_Num_Record/mean": 2.8809523582458496, "rewards/Evidence_Num_Record/std": 1.253334641456604, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.2273373007774353, "rewards/VideoAccuracy/std": 0.3822914659976959, "step": 75, "train_speed(iter/s)": 0.018504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.0, "completions/mean_length": 274.6428527832031, "completions/min_length": 197.0, "entropy/max": 0.5, "entropy/mean": 0.359375, "entropy/min": 0.26953125, "epoch": 0.076, "grad_norm": 1.5982752636011046, "kl": 0.08740234375, "learning_rate": 1.9781476007338054e-06, "loss": 0.0008751722052693367, "memory(GiB)": 145.75, "reward": 1.9944734573364258, "reward_std": 0.2277480810880661, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45637980103492737, "rewards/EvidenceHallucination/std": 0.28484517335891724, "rewards/Evidence_Num_Record/mean": 2.047619104385376, "rewards/Evidence_Num_Record/std": 0.30860671401023865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9031975269317627, "rewards/VideoAccuracy/std": 0.2695874869823456, "step": 76, "train_speed(iter/s)": 0.018539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.0, "completions/mean_length": 283.5476379394531, "completions/min_length": 214.0, "entropy/max": 0.578125, "entropy/mean": 0.37109375, "entropy/min": 0.251953125, "epoch": 0.077, "grad_norm": 1.6176824224306654, "kl": 0.07666015625, "learning_rate": 1.9774829053173526e-06, "loss": 0.0007775009144097567, "memory(GiB)": 145.75, "reward": 1.4460300207138062, "reward_std": 0.2849999666213989, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.15240485966205597, "rewards/EvidenceHallucination/std": 0.23497402667999268, "rewards/Evidence_Num_Record/mean": 1.9523810148239136, "rewards/Evidence_Num_Record/std": 0.4915074110031128, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.3607870936393738, "rewards/VideoAccuracy/std": 0.4723948538303375, "step": 77, "train_speed(iter/s)": 0.018647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 587.0, "completions/mean_length": 399.4285888671875, "completions/min_length": 225.0, "entropy/max": 1.2109375, "entropy/mean": 0.40234375, "entropy/min": 0.1826171875, "epoch": 0.078, "grad_norm": 1.442821113130145, "kl": 0.052490234375, "learning_rate": 1.976808366658895e-06, "loss": 0.0005323234363459051, "memory(GiB)": 145.75, "reward": 1.9828182458877563, "reward_std": 0.2595849633216858, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3858642578125, "rewards/EvidenceHallucination/std": 0.3123042583465576, "rewards/Evidence_Num_Record/mean": 2.7857143878936768, "rewards/Evidence_Num_Record/std": 0.7501451373100281, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7723119854927063, "rewards/VideoAccuracy/std": 0.389898419380188, "step": 78, "train_speed(iter/s)": 0.018659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/mean_length": 268.0952453613281, "completions/min_length": 203.0, "entropy/max": 0.61328125, "entropy/mean": 0.3828125, "entropy/min": 0.1904296875, "epoch": 0.079, "grad_norm": 1.864548819936298, "kl": 0.09423828125, "learning_rate": 1.97612399155103e-06, "loss": 0.0009492395329289138, "memory(GiB)": 145.75, "reward": 1.645930528640747, "reward_std": 0.5275153517723083, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.31298649311065674, "rewards/EvidenceHallucination/std": 0.32048505544662476, "rewards/Evidence_Num_Record/mean": 1.9523810148239136, "rewards/Evidence_Num_Record/std": 0.37949779629707336, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679574370384216, "step": 79, "train_speed(iter/s)": 0.018674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/mean_length": 273.0476379394531, "completions/min_length": 162.0, "entropy/max": 0.55078125, "entropy/mean": 0.34375, "entropy/min": 0.173828125, "epoch": 0.08, "grad_norm": 1.5542895075602492, "kl": 0.08349609375, "learning_rate": 1.975429786885407e-06, "loss": 0.0008524173754267395, "memory(GiB)": 145.75, "reward": 1.2154887914657593, "reward_std": 0.21944718062877655, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.1446523219347, "rewards/EvidenceHallucination/std": 0.29942378401756287, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.8207529187202454, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.19846321642398834, "rewards/VideoAccuracy/std": 0.37335455417633057, "step": 80, "train_speed(iter/s)": 0.018726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 847.0, "completions/mean_length": 450.452392578125, "completions/min_length": 196.0, "entropy/max": 0.53515625, "entropy/mean": 0.328125, "entropy/min": 0.1787109375, "epoch": 0.081, "grad_norm": 1.3445666849751166, "kl": 0.0595703125, "learning_rate": 1.974725759652659e-06, "loss": 0.0005987854674458504, "memory(GiB)": 145.75, "reward": 1.987047553062439, "reward_std": 0.19900517165660858, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31228476762771606, "rewards/EvidenceHallucination/std": 0.2700451910495758, "rewards/Evidence_Num_Record/mean": 3.095238208770752, "rewards/Evidence_Num_Record/std": 0.9578819274902344, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7245904803276062, "rewards/VideoAccuracy/std": 0.3621458113193512, "step": 81, "train_speed(iter/s)": 0.018757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 297.6190490722656, "completions/min_length": 184.0, "entropy/max": 1.5234375, "entropy/mean": 0.51953125, "entropy/min": 0.314453125, "epoch": 0.082, "grad_norm": 1.565173013639308, "kl": 0.10791015625, "learning_rate": 1.9740119169423336e-06, "loss": 0.0010965773835778236, "memory(GiB)": 145.75, "reward": 1.8209408521652222, "reward_std": 0.24913637340068817, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41422826051712036, "rewards/EvidenceHallucination/std": 0.31709954142570496, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.37020260095596313, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.738095223903656, "rewards/VideoAccuracy/std": 0.44500061869621277, "step": 82, "train_speed(iter/s)": 0.018742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.0, "completions/mean_length": 273.5714416503906, "completions/min_length": 144.0, "entropy/max": 0.5, "entropy/mean": 0.365234375, "entropy/min": 0.23828125, "epoch": 0.083, "grad_norm": 1.9360121628140547, "kl": 0.1171875, "learning_rate": 1.9732882659428175e-06, "loss": 0.0011830523144453764, "memory(GiB)": 145.75, "reward": 1.260207176208496, "reward_std": 0.36909279227256775, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.18188481032848358, "rewards/EvidenceHallucination/std": 0.33221206068992615, "rewards/Evidence_Num_Record/mean": 2.047619104385376, "rewards/Evidence_Num_Record/std": 0.4915074408054352, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2357349395751953, "rewards/VideoAccuracy/std": 0.3935372829437256, "step": 83, "train_speed(iter/s)": 0.018691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 304.952392578125, "completions/min_length": 177.0, "entropy/max": 0.435546875, "entropy/mean": 0.32421875, "entropy/min": 0.19921875, "epoch": 0.084, "grad_norm": 1.359891083571745, "kl": 0.09423828125, "learning_rate": 1.972554813941269e-06, "loss": 0.0011465921998023987, "memory(GiB)": 145.75, "reward": 1.6613414287567139, "reward_std": 0.07137398421764374, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.3027176856994629, "rewards/EvidenceHallucination/std": 0.3304864466190338, "rewards/Evidence_Num_Record/mean": 2.1190476417541504, "rewards/Evidence_Num_Record/std": 0.503760576248169, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.47936904430389404, "rewards/VideoAccuracy/std": 0.4919584095478058, "step": 84, "train_speed(iter/s)": 0.018785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 644.0, "completions/mean_length": 382.6190490722656, "completions/min_length": 220.0, "entropy/max": 1.1015625, "entropy/mean": 0.421875, "entropy/min": 0.19140625, "epoch": 0.085, "grad_norm": 1.5264792017573605, "kl": 0.09130859375, "learning_rate": 1.9718115683235415e-06, "loss": 0.0009243786334991455, "memory(GiB)": 145.75, "reward": 1.7950348854064941, "reward_std": 0.40462440252304077, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.327504962682724, "rewards/EvidenceHallucination/std": 0.3095806837081909, "rewards/Evidence_Num_Record/mean": 2.6666667461395264, "rewards/Evidence_Num_Record/std": 1.0040568113327026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6628671288490295, "rewards/VideoAccuracy/std": 0.44367995858192444, "step": 85, "train_speed(iter/s)": 0.018814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 404.0, "completions/mean_length": 280.73809814453125, "completions/min_length": 217.0, "entropy/max": 0.69921875, "entropy/mean": 0.38671875, "entropy/min": 0.2216796875, "epoch": 0.086, "grad_norm": 1.3721674291805483, "kl": 0.1298828125, "learning_rate": 1.97105853657411e-06, "loss": 0.0013156002387404442, "memory(GiB)": 145.75, "reward": 1.3735020160675049, "reward_std": 0.39117431640625, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20084336400032043, "rewards/EvidenceHallucination/std": 0.2974098324775696, "rewards/Evidence_Num_Record/mean": 2.047619104385376, "rewards/Evidence_Num_Record/std": 0.37949779629707336, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3333333432674408, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 86, "train_speed(iter/s)": 0.018832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 354.0, "completions/mean_length": 278.1190490722656, "completions/min_length": 182.0, "entropy/max": 0.75390625, "entropy/mean": 0.341796875, "entropy/min": 0.1787109375, "epoch": 0.087, "grad_norm": 1.4565366657438574, "kl": 0.1318359375, "learning_rate": 1.9702957262759963e-06, "loss": 0.0013202999252825975, "memory(GiB)": 145.75, "reward": 1.636621356010437, "reward_std": 0.2807752788066864, "rewards/EvidenceFormat/mean": 0.9523809552192688, "rewards/EvidenceFormat/std": 0.21554027497768402, "rewards/EvidenceHallucination/mean": 0.3674306571483612, "rewards/EvidenceHallucination/std": 0.35236856341362, "rewards/Evidence_Num_Record/mean": 1.8809523582458496, "rewards/Evidence_Num_Record/std": 0.45276281237602234, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5202780961990356, "rewards/VideoAccuracy/std": 0.48144081234931946, "step": 87, "train_speed(iter/s)": 0.018877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 780.0, "completions/mean_length": 430.0952453613281, "completions/min_length": 241.0, "entropy/max": 0.64453125, "entropy/mean": 0.365234375, "entropy/min": 0.177734375, "epoch": 0.088, "grad_norm": 1.4062133131420138, "kl": 0.080078125, "learning_rate": 1.969523145110691e-06, "loss": 0.0008045134018175304, "memory(GiB)": 145.75, "reward": 1.762561321258545, "reward_std": 0.39015036821365356, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2917018532752991, "rewards/EvidenceHallucination/std": 0.3407047688961029, "rewards/Evidence_Num_Record/mean": 2.8809523582458496, "rewards/Evidence_Num_Record/std": 0.8890219330787659, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5708876848220825, "rewards/VideoAccuracy/std": 0.46675702929496765, "step": 88, "train_speed(iter/s)": 0.018891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 418.0, "completions/mean_length": 283.6190490722656, "completions/min_length": 219.0, "entropy/max": 0.703125, "entropy/mean": 0.4296875, "entropy/min": 0.26953125, "epoch": 0.089, "grad_norm": 1.6626483004876464, "kl": 0.1396484375, "learning_rate": 1.9687408008580783e-06, "loss": 0.0014032538747414947, "memory(GiB)": 145.75, "reward": 1.3591610193252563, "reward_std": 0.44488558173179626, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18866242468357086, "rewards/EvidenceHallucination/std": 0.30501607060432434, "rewards/Evidence_Num_Record/mean": 2.0714285373687744, "rewards/Evidence_Num_Record/std": 0.26066118478775024, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3333333432674408, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 89, "train_speed(iter/s)": 0.018762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 720.0, "completions/mean_length": 286.1190490722656, "completions/min_length": 183.0, "entropy/max": 1.4296875, "entropy/mean": 0.4921875, "entropy/min": 0.255859375, "epoch": 0.09, "grad_norm": 1.3142514728000814, "kl": 0.130859375, "learning_rate": 1.967948701396356e-06, "loss": 0.0013334897812455893, "memory(GiB)": 145.75, "reward": 1.1134485006332397, "reward_std": 0.21092890202999115, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.05557645112276077, "rewards/EvidenceHallucination/std": 0.17599347233772278, "rewards/Evidence_Num_Record/mean": 1.8333333730697632, "rewards/Evidence_Num_Record/std": 0.5372316837310791, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.11423792690038681, "rewards/VideoAccuracy/std": 0.3155045509338379, "step": 90, "train_speed(iter/s)": 0.018816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 679.0, "completions/mean_length": 405.3571472167969, "completions/min_length": 206.0, "entropy/max": 0.482421875, "entropy/mean": 0.298828125, "entropy/min": 0.1396484375, "epoch": 0.091, "grad_norm": 1.3160893508945921, "kl": 0.07958984375, "learning_rate": 1.967146854701957e-06, "loss": 0.0008046379080042243, "memory(GiB)": 145.75, "reward": 2.143765449523926, "reward_std": 0.13687437772750854, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4880090057849884, "rewards/EvidenceHallucination/std": 0.30523109436035156, "rewards/Evidence_Num_Record/mean": 2.8333334922790527, "rewards/Evidence_Num_Record/std": 0.8811485767364502, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8461634516716003, "rewards/VideoAccuracy/std": 0.3223564326763153, "step": 91, "train_speed(iter/s)": 0.018827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 289.0476379394531, "completions/min_length": 206.0, "entropy/max": 0.7890625, "entropy/mean": 0.427734375, "entropy/min": 0.21875, "epoch": 0.092, "grad_norm": 1.5615526688327124, "kl": 0.12890625, "learning_rate": 1.9663352688494683e-06, "loss": 0.001303645665757358, "memory(GiB)": 145.75, "reward": 1.6100871562957764, "reward_std": 0.3530694842338562, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3123398423194885, "rewards/EvidenceHallucination/std": 0.3104865550994873, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.37020257115364075, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5476190447807312, "rewards/VideoAccuracy/std": 0.5037605166435242, "step": 92, "train_speed(iter/s)": 0.018856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 612.0, "completions/mean_length": 329.0476379394531, "completions/min_length": 223.0, "entropy/max": 1.28125, "entropy/mean": 0.453125, "entropy/min": 0.1181640625, "epoch": 0.093, "grad_norm": 1.4876802809944685, "kl": 0.1171875, "learning_rate": 1.965513952011551e-06, "loss": 0.0012173369759693742, "memory(GiB)": 145.75, "reward": 1.6506892442703247, "reward_std": 0.40816056728363037, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32043275237083435, "rewards/EvidenceHallucination/std": 0.28490114212036133, "rewards/Evidence_Num_Record/mean": 2.190476179122925, "rewards/Evidence_Num_Record/std": 1.0873574018478394, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5866028070449829, "rewards/VideoAccuracy/std": 0.4674220085144043, "step": 93, "train_speed(iter/s)": 0.018846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 546.0, "completions/mean_length": 328.9761962890625, "completions/min_length": 244.0, "entropy/max": 0.65625, "entropy/mean": 0.361328125, "entropy/min": 0.18359375, "epoch": 0.094, "grad_norm": 1.4128504167741638, "kl": 0.1259765625, "learning_rate": 1.964682912458856e-06, "loss": 0.0012843573931604624, "memory(GiB)": 145.75, "reward": 1.8426321744918823, "reward_std": 0.0740402340888977, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24410170316696167, "rewards/EvidenceHallucination/std": 0.3090246617794037, "rewards/Evidence_Num_Record/mean": 2.404762029647827, "rewards/Evidence_Num_Record/std": 0.7669872641563416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.660478413105011, "rewards/VideoAccuracy/std": 0.47979769110679626, "step": 94, "train_speed(iter/s)": 0.018934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 901.0, "completions/mean_length": 404.66668701171875, "completions/min_length": 236.0, "entropy/max": 0.921875, "entropy/mean": 0.431640625, "entropy/min": 0.181640625, "epoch": 0.095, "grad_norm": 1.3466708994180563, "kl": 0.09619140625, "learning_rate": 1.963842158559942e-06, "loss": 0.0009734997292980552, "memory(GiB)": 145.75, "reward": 1.4714419841766357, "reward_std": 0.33735716342926025, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16494199633598328, "rewards/EvidenceHallucination/std": 0.2535810172557831, "rewards/Evidence_Num_Record/mean": 2.7857143878936768, "rewards/Evidence_Num_Record/std": 0.8981204032897949, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.3717869222164154, "rewards/VideoAccuracy/std": 0.4193684756755829, "step": 95, "train_speed(iter/s)": 0.018945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/mean_length": 280.9761962890625, "completions/min_length": 216.0, "entropy/max": 1.234375, "entropy/mean": 0.421875, "entropy/min": 0.2412109375, "epoch": 0.096, "grad_norm": 1.5754236514272237, "kl": 0.1376953125, "learning_rate": 1.9629916987811925e-06, "loss": 0.0013955392641946673, "memory(GiB)": 145.75, "reward": 1.781163215637207, "reward_std": 0.31508854031562805, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46638545393943787, "rewards/EvidenceHallucination/std": 0.3576310873031616, "rewards/Evidence_Num_Record/mean": 2.095238208770752, "rewards/Evidence_Num_Record/std": 0.2971017360687256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6878859400749207, "rewards/VideoAccuracy/std": 0.44374170899391174, "step": 96, "train_speed(iter/s)": 0.01895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 357.0, "completions/mean_length": 293.8571472167969, "completions/min_length": 232.0, "entropy/max": 0.4609375, "entropy/mean": 0.353515625, "entropy/min": 0.2265625, "epoch": 0.097, "grad_norm": 1.4320296713190575, "kl": 0.1318359375, "learning_rate": 1.962131541686727e-06, "loss": 0.0013267816975712776, "memory(GiB)": 145.75, "reward": 1.6482120752334595, "reward_std": 0.2762901782989502, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30244576930999756, "rewards/EvidenceHallucination/std": 0.36234188079833984, "rewards/Evidence_Num_Record/mean": 2.0, "rewards/Evidence_Num_Record/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5210562348365784, "rewards/VideoAccuracy/std": 0.5029321312904358, "step": 97, "train_speed(iter/s)": 0.018976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/mean_length": 371.5, "completions/min_length": 233.0, "entropy/max": 0.8515625, "entropy/mean": 0.380859375, "entropy/min": 0.197265625, "epoch": 0.098, "grad_norm": 1.1665879934566166, "kl": 0.08154296875, "learning_rate": 1.9612616959383188e-06, "loss": 0.0008269266108982265, "memory(GiB)": 145.75, "reward": 1.9281727075576782, "reward_std": 0.2275279015302658, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4922899603843689, "rewards/EvidenceHallucination/std": 0.3693905770778656, "rewards/Evidence_Num_Record/mean": 2.5238096714019775, "rewards/Evidence_Num_Record/std": 0.551631510257721, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6963813304901123, "rewards/VideoAccuracy/std": 0.40303418040275574, "step": 98, "train_speed(iter/s)": 0.018996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 413.0, "completions/mean_length": 289.1190490722656, "completions/min_length": 225.0, "entropy/max": 0.953125, "entropy/mean": 0.486328125, "entropy/min": 0.21484375, "epoch": 0.099, "grad_norm": 1.6602149895534124, "kl": 0.1357421875, "learning_rate": 1.9603821702953047e-06, "loss": 0.0013623833656311035, "memory(GiB)": 145.75, "reward": 1.7348952293395996, "reward_std": 0.3678227365016937, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3411424458026886, "rewards/EvidenceHallucination/std": 0.30779772996902466, "rewards/Evidence_Num_Record/mean": 2.190476179122925, "rewards/Evidence_Num_Record/std": 0.5054867267608643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 99, "train_speed(iter/s)": 0.018977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 317.0952453613281, "completions/min_length": 182.0, "entropy/max": 0.61328125, "entropy/mean": 0.373046875, "entropy/min": 0.1357421875, "epoch": 0.1, "grad_norm": 1.532235925960191, "kl": 0.134765625, "learning_rate": 1.9594929736144973e-06, "loss": 0.00138301239348948, "memory(GiB)": 145.75, "reward": 1.3657147884368896, "reward_std": 0.398904412984848, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1937606930732727, "rewards/EvidenceHallucination/std": 0.31757616996765137, "rewards/Evidence_Num_Record/mean": 2.0238096714019775, "rewards/Evidence_Num_Record/std": 0.26942533254623413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3269627094268799, "rewards/VideoAccuracy/std": 0.4119543433189392, "step": 100, "train_speed(iter/s)": 0.018944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 858.0, "completions/mean_length": 395.66668701171875, "completions/min_length": 231.0, "entropy/max": 0.734375, "entropy/mean": 0.31640625, "entropy/min": 0.1669921875, "epoch": 0.101, "grad_norm": 1.2899605030948045, "kl": 0.083984375, "learning_rate": 1.9585941148500986e-06, "loss": 0.0008435548515990376, "memory(GiB)": 145.75, "reward": 1.9611115455627441, "reward_std": 0.31510135531425476, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.401968777179718, "rewards/EvidenceHallucination/std": 0.311238169670105, "rewards/Evidence_Num_Record/mean": 2.690476179122925, "rewards/Evidence_Num_Record/std": 0.7152722477912903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6807177662849426, "rewards/VideoAccuracy/std": 0.40623319149017334, "step": 101, "train_speed(iter/s)": 0.018779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/mean_length": 284.8571472167969, "completions/min_length": 183.0, "entropy/max": 0.96875, "entropy/mean": 0.447265625, "entropy/min": 0.2392578125, "epoch": 0.102, "grad_norm": 1.6057589227526903, "kl": 0.1298828125, "learning_rate": 1.957685603053605e-06, "loss": 0.0013163024559617043, "memory(GiB)": 145.75, "reward": 1.7291897535324097, "reward_std": 0.3314557671546936, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.312615305185318, "rewards/EvidenceHallucination/std": 0.27923011779785156, "rewards/Evidence_Num_Record/mean": 2.0714285373687744, "rewards/Evidence_Num_Record/std": 0.34165000915527344, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 102, "train_speed(iter/s)": 0.018807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 354.0, "completions/mean_length": 282.8571472167969, "completions/min_length": 195.0, "entropy/max": 0.76171875, "entropy/mean": 0.373046875, "entropy/min": 0.25390625, "epoch": 0.103, "grad_norm": 1.5676941891920446, "kl": 0.1455078125, "learning_rate": 1.9567674473737218e-06, "loss": 0.0014565930468961596, "memory(GiB)": 145.75, "reward": 1.2863401174545288, "reward_std": 0.42372652888298035, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.12458339333534241, "rewards/EvidenceHallucination/std": 0.2610587179660797, "rewards/Evidence_Num_Record/mean": 2.1666667461395264, "rewards/Evidence_Num_Record/std": 0.37719547748565674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.26142337918281555, "rewards/VideoAccuracy/std": 0.4441832900047302, "step": 103, "train_speed(iter/s)": 0.018817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 856.0, "completions/mean_length": 360.452392578125, "completions/min_length": 208.0, "entropy/max": 0.7578125, "entropy/mean": 0.37109375, "entropy/min": 0.203125, "epoch": 0.104, "grad_norm": 1.270247282735159, "kl": 0.130859375, "learning_rate": 1.955839657056265e-06, "loss": 0.0015169496182352304, "memory(GiB)": 145.75, "reward": 1.7464238405227661, "reward_std": 0.2219785451889038, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2563296854496002, "rewards/EvidenceHallucination/std": 0.32491403818130493, "rewards/Evidence_Num_Record/mean": 2.642857074737549, "rewards/Evidence_Num_Record/std": 1.122310996055603, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5618245601654053, "rewards/VideoAccuracy/std": 0.45603543519973755, "step": 104, "train_speed(iter/s)": 0.018801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 670.0, "completions/mean_length": 392.5714416503906, "completions/min_length": 229.0, "entropy/max": 1.203125, "entropy/mean": 0.408203125, "entropy/min": 0.1689453125, "epoch": 0.105, "grad_norm": 1.5373718970893957, "kl": 0.109375, "learning_rate": 1.9549022414440736e-06, "loss": 0.001107646618038416, "memory(GiB)": 145.75, "reward": 1.7364230155944824, "reward_std": 0.3530835807323456, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3004482090473175, "rewards/EvidenceHallucination/std": 0.33540382981300354, "rewards/Evidence_Num_Record/mean": 2.7857143878936768, "rewards/Evidence_Num_Record/std": 0.7501451373100281, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6096665859222412, "rewards/VideoAccuracy/std": 0.4684169292449951, "step": 105, "train_speed(iter/s)": 0.018819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.0, "completions/mean_length": 297.8333435058594, "completions/min_length": 167.0, "entropy/max": 0.482421875, "entropy/mean": 0.375, "entropy/min": 0.2197265625, "epoch": 0.106, "grad_norm": 1.3914419307345625, "kl": 0.1416015625, "learning_rate": 1.9539552099769126e-06, "loss": 0.0014185493346303701, "memory(GiB)": 145.75, "reward": 1.5386505126953125, "reward_std": 0.34179314970970154, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31230056285858154, "rewards/EvidenceHallucination/std": 0.3736018240451813, "rewards/Evidence_Num_Record/mean": 2.2142858505249023, "rewards/Evidence_Num_Record/std": 0.4152997136116028, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4761904776096344, "rewards/VideoAccuracy/std": 0.5054867267608643, "step": 106, "train_speed(iter/s)": 0.018817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 469.0, "completions/mean_length": 300.9285888671875, "completions/min_length": 155.0, "entropy/max": 0.609375, "entropy/mean": 0.39453125, "entropy/min": 0.15625, "epoch": 0.107, "grad_norm": 1.4888784408936901, "kl": 0.1513671875, "learning_rate": 1.952998572191378e-06, "loss": 0.0015213524457067251, "memory(GiB)": 145.75, "reward": 1.6890357732772827, "reward_std": 0.32763195037841797, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3215029239654541, "rewards/EvidenceHallucination/std": 0.32102346420288086, "rewards/Evidence_Num_Record/mean": 2.142857074737549, "rewards/Evidence_Num_Record/std": 0.41739192605018616, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5580684542655945, "rewards/VideoAccuracy/std": 0.4772607684135437, "step": 107, "train_speed(iter/s)": 0.018838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 745.0, "completions/mean_length": 475.21429443359375, "completions/min_length": 269.0, "entropy/max": 1.15625, "entropy/mean": 0.38671875, "entropy/min": 0.162109375, "epoch": 0.108, "grad_norm": 1.267622073447083, "kl": 0.07763671875, "learning_rate": 1.9520323377208013e-06, "loss": 0.000781947048380971, "memory(GiB)": 145.75, "reward": 1.8262310028076172, "reward_std": 0.3916190266609192, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30679747462272644, "rewards/EvidenceHallucination/std": 0.284414678812027, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 0.8621610999107361, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6315380334854126, "rewards/VideoAccuracy/std": 0.4059906601905823, "step": 108, "train_speed(iter/s)": 0.018845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 434.0, "completions/mean_length": 310.73809814453125, "completions/min_length": 185.0, "entropy/max": 1.1953125, "entropy/mean": 0.51171875, "entropy/min": 0.25390625, "epoch": 0.109, "grad_norm": 1.6471346855993556, "kl": 0.1357421875, "learning_rate": 1.9510565162951534e-06, "loss": 0.0013572005555033684, "memory(GiB)": 146.12, "reward": 1.835160255432129, "reward_std": 0.19685718417167664, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3662773370742798, "rewards/EvidenceHallucination/std": 0.27799177169799805, "rewards/Evidence_Num_Record/mean": 2.2857143878936768, "rewards/Evidence_Num_Record/std": 0.5077791810035706, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.761904776096344, "rewards/VideoAccuracy/std": 0.43108054995536804, "step": 109, "train_speed(iter/s)": 0.018797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.0, "completions/mean_length": 288.5476379394531, "completions/min_length": 199.0, "entropy/max": 0.51171875, "entropy/mean": 0.373046875, "entropy/min": 0.2373046875, "epoch": 0.11, "grad_norm": 1.2249208193246408, "kl": 0.142578125, "learning_rate": 1.9500711177409454e-06, "loss": 0.0014356218744069338, "memory(GiB)": 146.12, "reward": 1.105482578277588, "reward_std": 0.1584109514951706, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.020045572891831398, "rewards/EvidenceHallucination/std": 0.09083432704210281, "rewards/Evidence_Num_Record/mean": 2.0238096714019775, "rewards/Evidence_Num_Record/std": 0.4679011404514313, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.10147348046302795, "rewards/VideoAccuracy/std": 0.26096680760383606, "step": 110, "train_speed(iter/s)": 0.01883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 684.0, "completions/mean_length": 402.23809814453125, "completions/min_length": 234.0, "entropy/max": 0.458984375, "entropy/mean": 0.28125, "entropy/min": 0.146484375, "epoch": 0.111, "grad_norm": 1.1488735921623328, "kl": 0.09912109375, "learning_rate": 1.9490761519811294e-06, "loss": 0.001193409669212997, "memory(GiB)": 146.12, "reward": 1.8218879699707031, "reward_std": 0.2910378575325012, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24762538075447083, "rewards/EvidenceHallucination/std": 0.36527809500694275, "rewards/Evidence_Num_Record/mean": 2.5714285373687744, "rewards/Evidence_Num_Record/std": 0.5008703470230103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5723628997802734, "rewards/VideoAccuracy/std": 0.46409302949905396, "step": 111, "train_speed(iter/s)": 0.018845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 438.0, "completions/mean_length": 303.0476379394531, "completions/min_length": 195.0, "entropy/max": 1.2890625, "entropy/mean": 0.54296875, "entropy/min": 0.259765625, "epoch": 0.112, "grad_norm": 1.5008335928014824, "kl": 0.1494140625, "learning_rate": 1.9480716290349993e-06, "loss": 0.0015141356270760298, "memory(GiB)": 146.12, "reward": 1.5550602674484253, "reward_std": 0.2568727731704712, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2753015160560608, "rewards/EvidenceHallucination/std": 0.3077669143676758, "rewards/Evidence_Num_Record/mean": 2.238095283508301, "rewards/Evidence_Num_Record/std": 0.5763435959815979, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5, "rewards/VideoAccuracy/std": 0.5060608386993408, "step": 112, "train_speed(iter/s)": 0.018847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.0, "completions/mean_length": 314.3095397949219, "completions/min_length": 164.0, "entropy/max": 1.4921875, "entropy/mean": 0.458984375, "entropy/min": 0.2412109375, "epoch": 0.113, "grad_norm": 1.5778138722270425, "kl": 0.140625, "learning_rate": 1.9470575590180908e-06, "loss": 0.0014103710418567061, "memory(GiB)": 146.12, "reward": 1.460694432258606, "reward_std": 0.29496198892593384, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21751172840595245, "rewards/EvidenceHallucination/std": 0.32539689540863037, "rewards/Evidence_Num_Record/mean": 2.261904716491699, "rewards/Evidence_Num_Record/std": 0.5868279337882996, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4171919524669647, "rewards/VideoAccuracy/std": 0.48727452754974365, "step": 113, "train_speed(iter/s)": 0.018868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 963.0, "completions/mean_length": 375.3333435058594, "completions/min_length": 212.0, "entropy/max": 0.53515625, "entropy/mean": 0.353515625, "entropy/min": 0.1650390625, "epoch": 0.114, "grad_norm": 1.3875680698005661, "kl": 0.1337890625, "learning_rate": 1.946033952142077e-06, "loss": 0.0015402303542941809, "memory(GiB)": 146.12, "reward": 1.5177232027053833, "reward_std": 0.12473516166210175, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18247689306735992, "rewards/EvidenceHallucination/std": 0.3012517988681793, "rewards/Evidence_Num_Record/mean": 2.8333334922790527, "rewards/Evidence_Num_Record/std": 1.7379082441329956, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.34789445996284485, "rewards/VideoAccuracy/std": 0.4493321180343628, "step": 114, "train_speed(iter/s)": 0.01893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2034.0, "completions/mean_length": 454.23809814453125, "completions/min_length": 208.0, "entropy/max": 0.8984375, "entropy/mean": 0.4140625, "entropy/min": 0.1494140625, "epoch": 0.115, "grad_norm": 1.0701861902019776, "kl": 0.10400390625, "learning_rate": 1.945000818714668e-06, "loss": 0.001130782999098301, "memory(GiB)": 146.12, "reward": 1.4441184997558594, "reward_std": 0.2652636468410492, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.148814857006073, "rewards/EvidenceHallucination/std": 0.2521863281726837, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 5.08305025100708, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.3476888835430145, "rewards/VideoAccuracy/std": 0.43469253182411194, "step": 115, "train_speed(iter/s)": 0.018891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 420.0, "completions/mean_length": 298.69049072265625, "completions/min_length": 207.0, "entropy/max": 0.97265625, "entropy/mean": 0.396484375, "entropy/min": 0.2197265625, "epoch": 0.116, "grad_norm": 1.402990372757954, "kl": 0.1533203125, "learning_rate": 1.9439581691395065e-06, "loss": 0.0015466272598132491, "memory(GiB)": 146.12, "reward": 1.3327633142471313, "reward_std": 0.27393803000450134, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1695629358291626, "rewards/EvidenceHallucination/std": 0.31487464904785156, "rewards/Evidence_Num_Record/mean": 2.190476179122925, "rewards/Evidence_Num_Record/std": 0.45468270778656006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.29885080456733704, "rewards/VideoAccuracy/std": 0.45135653018951416, "step": 116, "train_speed(iter/s)": 0.018918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 594.0, "completions/mean_length": 319.26190185546875, "completions/min_length": 209.0, "entropy/max": 0.5546875, "entropy/mean": 0.375, "entropy/min": 0.2177734375, "epoch": 0.117, "grad_norm": 1.1686409064284398, "kl": 0.1455078125, "learning_rate": 1.9429060139160616e-06, "loss": 0.0014689366798847914, "memory(GiB)": 146.12, "reward": 1.408905029296875, "reward_std": 0.11996100842952728, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2803170084953308, "rewards/EvidenceHallucination/std": 0.41000500321388245, "rewards/Evidence_Num_Record/mean": 2.2857143878936768, "rewards/Evidence_Num_Record/std": 0.45722994208335876, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.286175012588501, "rewards/VideoAccuracy/std": 0.4182421863079071, "step": 117, "train_speed(iter/s)": 0.018936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 840.0, "completions/mean_length": 437.90478515625, "completions/min_length": 217.0, "entropy/max": 0.80078125, "entropy/mean": 0.35546875, "entropy/min": 0.154296875, "epoch": 0.118, "grad_norm": 1.3390124812681243, "kl": 0.09521484375, "learning_rate": 1.9418443636395246e-06, "loss": 0.0009612910216674209, "memory(GiB)": 146.12, "reward": 1.8514626026153564, "reward_std": 0.32283729314804077, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31857970356941223, "rewards/EvidenceHallucination/std": 0.3661547005176544, "rewards/Evidence_Num_Record/mean": 2.809523820877075, "rewards/Evidence_Num_Record/std": 0.7066960334777832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6544132828712463, "rewards/VideoAccuracy/std": 0.482687383890152, "step": 118, "train_speed(iter/s)": 0.018925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/mean_length": 289.8571472167969, "completions/min_length": 168.0, "entropy/max": 1.0, "entropy/mean": 0.466796875, "entropy/min": 0.166015625, "epoch": 0.119, "grad_norm": 1.8322000879365274, "kl": 0.14453125, "learning_rate": 1.940773229000702e-06, "loss": 0.0014447685098275542, "memory(GiB)": 146.12, "reward": 1.6284502744674683, "reward_std": 0.4649772346019745, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2851080298423767, "rewards/EvidenceHallucination/std": 0.2923296093940735, "rewards/Evidence_Num_Record/mean": 2.261904716491699, "rewards/Evidence_Num_Record/std": 0.6647766828536987, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5714285969734192, "rewards/VideoAccuracy/std": 0.5008702874183655, "step": 119, "train_speed(iter/s)": 0.018928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/mean_length": 320.8809509277344, "completions/min_length": 209.0, "entropy/max": 0.83203125, "entropy/mean": 0.41796875, "entropy/min": 0.2392578125, "epoch": 0.12, "grad_norm": 1.2953318223478445, "kl": 0.1396484375, "learning_rate": 1.9396926207859082e-06, "loss": 0.0013921773061156273, "memory(GiB)": 146.12, "reward": 1.2641671895980835, "reward_std": 0.3343121409416199, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13318182528018951, "rewards/EvidenceHallucination/std": 0.26720312237739563, "rewards/Evidence_Num_Record/mean": 2.4761905670166016, "rewards/Evidence_Num_Record/std": 0.6339229345321655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.23753078281879425, "rewards/VideoAccuracy/std": 0.430060476064682, "step": 120, "train_speed(iter/s)": 0.018922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/mean_length": 446.40478515625, "completions/min_length": 211.0, "entropy/max": 0.5, "entropy/mean": 0.310546875, "entropy/min": 0.1923828125, "epoch": 0.121, "grad_norm": 1.120987905866189, "kl": 0.09814453125, "learning_rate": 1.9386025498768555e-06, "loss": 0.0011982453288510442, "memory(GiB)": 146.12, "reward": 2.073885917663574, "reward_std": 0.15345653891563416, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42954981327056885, "rewards/EvidenceHallucination/std": 0.38178956508636475, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 0.7344991564750671, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7879758477210999, "rewards/VideoAccuracy/std": 0.43794405460357666, "step": 121, "train_speed(iter/s)": 0.018874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/mean_length": 369.452392578125, "completions/min_length": 261.0, "entropy/max": 1.3359375, "entropy/mean": 0.53125, "entropy/min": 0.30078125, "epoch": 0.122, "grad_norm": 1.516907029994994, "kl": 0.1396484375, "learning_rate": 1.937503027250546e-06, "loss": 0.0014094719663262367, "memory(GiB)": 146.12, "reward": 1.7915791273117065, "reward_std": 0.22216013073921204, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.445990651845932, "rewards/EvidenceHallucination/std": 0.32774993777275085, "rewards/Evidence_Num_Record/mean": 2.761904716491699, "rewards/Evidence_Num_Record/std": 0.8499504923820496, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7142857313156128, "rewards/VideoAccuracy/std": 0.45722997188568115, "step": 122, "train_speed(iter/s)": 0.018879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.0, "completions/mean_length": 336.952392578125, "completions/min_length": 206.0, "entropy/max": 0.73046875, "entropy/mean": 0.4140625, "entropy/min": 0.2333984375, "epoch": 0.123, "grad_norm": 1.6973598296429866, "kl": 0.1455078125, "learning_rate": 1.9363940639791603e-06, "loss": 0.001456602243706584, "memory(GiB)": 146.12, "reward": 1.4033061265945435, "reward_std": 0.44239187240600586, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18754583597183228, "rewards/EvidenceHallucination/std": 0.31474003195762634, "rewards/Evidence_Num_Record/mean": 2.4761905670166016, "rewards/Evidence_Num_Record/std": 0.6712963581085205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.36579692363739014, "rewards/VideoAccuracy/std": 0.4749639332294464, "step": 123, "train_speed(iter/s)": 0.018884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 661.0, "completions/mean_length": 399.3095397949219, "completions/min_length": 250.0, "entropy/max": 0.5078125, "entropy/mean": 0.37109375, "entropy/min": 0.1845703125, "epoch": 0.124, "grad_norm": 1.407556389216323, "kl": 0.1328125, "learning_rate": 1.9352756712299464e-06, "loss": 0.0013447859091684222, "memory(GiB)": 146.12, "reward": 1.5876822471618652, "reward_std": 0.318477988243103, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19310377538204193, "rewards/EvidenceHallucination/std": 0.3203859031200409, "rewards/Evidence_Num_Record/mean": 2.7142858505249023, "rewards/Evidence_Num_Record/std": 0.9444501399993896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4157281816005707, "rewards/VideoAccuracy/std": 0.4557120203971863, "step": 124, "train_speed(iter/s)": 0.018918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 830.0, "completions/mean_length": 471.9761962890625, "completions/min_length": 222.0, "entropy/max": 1.7265625, "entropy/mean": 0.55078125, "entropy/min": 0.1865234375, "epoch": 0.125, "grad_norm": 1.4369879104798542, "kl": 0.115234375, "learning_rate": 1.9341478602651067e-06, "loss": 0.0011676698923110962, "memory(GiB)": 146.12, "reward": 1.4673373699188232, "reward_std": 0.3999050557613373, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1500246524810791, "rewards/EvidenceHallucination/std": 0.2782667279243469, "rewards/Evidence_Num_Record/mean": 3.047619104385376, "rewards/Evidence_Num_Record/std": 1.0109734535217285, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.3706657886505127, "rewards/VideoAccuracy/std": 0.4459346830844879, "step": 125, "train_speed(iter/s)": 0.018913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.0, "completions/mean_length": 332.76190185546875, "completions/min_length": 198.0, "entropy/max": 0.69140625, "entropy/mean": 0.400390625, "entropy/min": 0.23046875, "epoch": 0.126, "grad_norm": 1.5109059253680701, "kl": 0.1552734375, "learning_rate": 1.933010642441685e-06, "loss": 0.0015631616115570068, "memory(GiB)": 146.12, "reward": 1.7317975759506226, "reward_std": 0.3719199299812317, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3935978412628174, "rewards/EvidenceHallucination/std": 0.3575633764266968, "rewards/Evidence_Num_Record/mean": 2.5238096714019775, "rewards/Evidence_Num_Record/std": 0.6339229345321655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6530779004096985, "rewards/VideoAccuracy/std": 0.471064954996109, "step": 126, "train_speed(iter/s)": 0.018898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 295.3095397949219, "completions/min_length": 199.0, "entropy/max": 0.58984375, "entropy/mean": 0.380859375, "entropy/min": 0.150390625, "epoch": 0.127, "grad_norm": 1.2879849893868394, "kl": 0.1591796875, "learning_rate": 1.9318640292114524e-06, "loss": 0.0018035446992143989, "memory(GiB)": 146.12, "reward": 1.565708041191101, "reward_std": 0.11706583946943283, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28319069743156433, "rewards/EvidenceHallucination/std": 0.3761787414550781, "rewards/Evidence_Num_Record/mean": 2.190476179122925, "rewards/Evidence_Num_Record/std": 0.5054867267608643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4424029290676117, "rewards/VideoAccuracy/std": 0.4586055278778076, "step": 127, "train_speed(iter/s)": 0.01891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 722.0, "completions/mean_length": 483.8571472167969, "completions/min_length": 227.0, "entropy/max": 0.96484375, "entropy/mean": 0.392578125, "entropy/min": 0.169921875, "epoch": 0.128, "grad_norm": 1.2421907043489904, "kl": 0.10107421875, "learning_rate": 1.930708032120791e-06, "loss": 0.0010285605676472187, "memory(GiB)": 146.12, "reward": 1.8038581609725952, "reward_std": 0.32695096731185913, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3501226305961609, "rewards/EvidenceHallucination/std": 0.3542228043079376, "rewards/Evidence_Num_Record/mean": 3.2142858505249023, "rewards/Evidence_Num_Record/std": 0.9761975407600403, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.600500226020813, "rewards/VideoAccuracy/std": 0.43714261054992676, "step": 128, "train_speed(iter/s)": 0.018909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 617.0, "completions/mean_length": 401.5, "completions/min_length": 258.0, "entropy/max": 0.88671875, "entropy/mean": 0.466796875, "entropy/min": 0.26953125, "epoch": 0.129, "grad_norm": 1.4672836541554346, "kl": 0.1474609375, "learning_rate": 1.929542662810579e-06, "loss": 0.0014828164130449295, "memory(GiB)": 146.12, "reward": 1.5887269973754883, "reward_std": 0.4017311930656433, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32458657026290894, "rewards/EvidenceHallucination/std": 0.33995798230171204, "rewards/Evidence_Num_Record/mean": 2.857142925262451, "rewards/Evidence_Num_Record/std": 0.6833000779151917, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.523809552192688, "rewards/VideoAccuracy/std": 0.5054867267608643, "step": 129, "train_speed(iter/s)": 0.018908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 409.0, "completions/mean_length": 297.21429443359375, "completions/min_length": 223.0, "entropy/max": 0.55078125, "entropy/mean": 0.365234375, "entropy/min": 0.2177734375, "epoch": 0.13, "grad_norm": 1.3237343949509217, "kl": 0.1650390625, "learning_rate": 1.9283679330160725e-06, "loss": 0.001645385636948049, "memory(GiB)": 146.12, "reward": 1.3768378496170044, "reward_std": 0.21993836760520935, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1668916642665863, "rewards/EvidenceHallucination/std": 0.27208632230758667, "rewards/Evidence_Num_Record/mean": 2.309523820877075, "rewards/Evidence_Num_Record/std": 0.5174088478088379, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3434593677520752, "rewards/VideoAccuracy/std": 0.4569014608860016, "step": 130, "train_speed(iter/s)": 0.018925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 791.0, "completions/mean_length": 509.9761962890625, "completions/min_length": 288.0, "entropy/max": 0.890625, "entropy/mean": 0.33203125, "entropy/min": 0.1142578125, "epoch": 0.131, "grad_norm": 0.9172565634820042, "kl": 0.10009765625, "learning_rate": 1.9271838545667875e-06, "loss": 0.0014220774173736572, "memory(GiB)": 146.12, "reward": 1.824385166168213, "reward_std": 0.15645675361156464, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.295434832572937, "rewards/EvidenceHallucination/std": 0.3719170093536377, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.7419721484184265, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.565298318862915, "rewards/VideoAccuracy/std": 0.5190615057945251, "step": 131, "train_speed(iter/s)": 0.018919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 967.0, "completions/mean_length": 436.0238037109375, "completions/min_length": 213.0, "entropy/max": 1.4765625, "entropy/mean": 0.53515625, "entropy/min": 0.25390625, "epoch": 0.132, "grad_norm": 1.272509803292311, "kl": 0.1494140625, "learning_rate": 1.92599043938638e-06, "loss": 0.0015272954478859901, "memory(GiB)": 146.12, "reward": 1.377498984336853, "reward_std": 0.3350825309753418, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2208285629749298, "rewards/EvidenceHallucination/std": 0.3242204487323761, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 1.4323700666427612, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3333333432674408, "rewards/VideoAccuracy/std": 0.47711876034736633, "step": 132, "train_speed(iter/s)": 0.018882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.0, "completions/mean_length": 312.1190490722656, "completions/min_length": 199.0, "entropy/max": 1.03125, "entropy/mean": 0.447265625, "entropy/min": 0.2421875, "epoch": 0.133, "grad_norm": 1.251189024443508, "kl": 0.1640625, "learning_rate": 1.924787699492529e-06, "loss": 0.0016607262659817934, "memory(GiB)": 146.12, "reward": 1.2626943588256836, "reward_std": 0.12782520055770874, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.11290771514177322, "rewards/EvidenceHallucination/std": 0.2381616085767746, "rewards/Evidence_Num_Record/mean": 2.190476179122925, "rewards/Evidence_Num_Record/std": 0.6712963581085205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.240112766623497, "rewards/VideoAccuracy/std": 0.40528300404548645, "step": 133, "train_speed(iter/s)": 0.018906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 645.0, "completions/mean_length": 407.9761962890625, "completions/min_length": 276.0, "entropy/max": 0.87890625, "entropy/mean": 0.4140625, "entropy/min": 0.181640625, "epoch": 0.134, "grad_norm": 1.4381078327036072, "kl": 0.1474609375, "learning_rate": 1.923575646996811e-06, "loss": 0.0014882514951750636, "memory(GiB)": 146.12, "reward": 1.883029818534851, "reward_std": 0.2080840915441513, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22200721502304077, "rewards/EvidenceHallucination/std": 0.33083653450012207, "rewards/Evidence_Num_Record/mean": 2.5714285373687744, "rewards/Evidence_Num_Record/std": 0.547404408454895, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7052949070930481, "rewards/VideoAccuracy/std": 0.469309002161026, "step": 134, "train_speed(iter/s)": 0.018913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 746.0, "completions/mean_length": 506.73809814453125, "completions/min_length": 281.0, "entropy/max": 1.75, "entropy/mean": 0.57421875, "entropy/min": 0.11767578125, "epoch": 0.135, "grad_norm": 1.2160665784268834, "kl": 0.12353515625, "learning_rate": 1.9223542941045815e-06, "loss": 0.0012557308655232191, "memory(GiB)": 146.12, "reward": 1.589491367340088, "reward_std": 0.43333733081817627, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2434537559747696, "rewards/EvidenceHallucination/std": 0.32277417182922363, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.8050364255905151, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.47413405776023865, "rewards/VideoAccuracy/std": 0.44823360443115234, "step": 135, "train_speed(iter/s)": 0.018885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/mean_length": 361.6190490722656, "completions/min_length": 225.0, "entropy/max": 0.69921875, "entropy/mean": 0.43359375, "entropy/min": 0.224609375, "epoch": 0.136, "grad_norm": 1.4554467565915103, "kl": 0.171875, "learning_rate": 1.92112365311485e-06, "loss": 0.0017367280088365078, "memory(GiB)": 146.12, "reward": 1.5271995067596436, "reward_std": 0.2954389452934265, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23232774436473846, "rewards/EvidenceHallucination/std": 0.28536492586135864, "rewards/Evidence_Num_Record/mean": 2.595238208770752, "rewards/Evidence_Num_Record/std": 0.6270147562026978, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4807339608669281, "rewards/VideoAccuracy/std": 0.5015210509300232, "step": 136, "train_speed(iter/s)": 0.018897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 693.0, "completions/mean_length": 400.16668701171875, "completions/min_length": 260.0, "entropy/max": 0.515625, "entropy/mean": 0.392578125, "entropy/min": 0.2392578125, "epoch": 0.137, "grad_norm": 1.463680950683056, "kl": 0.17578125, "learning_rate": 1.9198837364201583e-06, "loss": 0.0017780549824237823, "memory(GiB)": 146.12, "reward": 1.5801057815551758, "reward_std": 0.18565520644187927, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31667351722717285, "rewards/EvidenceHallucination/std": 0.40294042229652405, "rewards/Evidence_Num_Record/mean": 2.809523820877075, "rewards/Evidence_Num_Record/std": 0.7726449966430664, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.45010441541671753, "rewards/VideoAccuracy/std": 0.4757658541202545, "step": 137, "train_speed(iter/s)": 0.018911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1106.0, "completions/mean_length": 573.2142944335938, "completions/min_length": 303.0, "entropy/max": 1.328125, "entropy/mean": 0.447265625, "entropy/min": 0.115234375, "epoch": 0.138, "grad_norm": 1.3228258962917756, "kl": 0.10791015625, "learning_rate": 1.9186345565064534e-06, "loss": 0.0010970378061756492, "memory(GiB)": 146.12, "reward": 1.7937687635421753, "reward_std": 0.3511067032814026, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.269847571849823, "rewards/EvidenceHallucination/std": 0.3559637665748596, "rewards/Evidence_Num_Record/mean": 3.309523820877075, "rewards/Evidence_Num_Record/std": 0.7804968953132629, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6112276911735535, "rewards/VideoAccuracy/std": 0.42789316177368164, "step": 138, "train_speed(iter/s)": 0.018887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 668.0, "completions/mean_length": 409.4285888671875, "completions/min_length": 236.0, "entropy/max": 0.9765625, "entropy/mean": 0.451171875, "entropy/min": 0.2177734375, "epoch": 0.139, "grad_norm": 1.370286784743664, "kl": 0.171875, "learning_rate": 1.9173761259529635e-06, "loss": 0.0017487120348960161, "memory(GiB)": 146.12, "reward": 1.6621686220169067, "reward_std": 0.28146690130233765, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33465176820755005, "rewards/EvidenceHallucination/std": 0.32503581047058105, "rewards/Evidence_Num_Record/mean": 2.809523820877075, "rewards/Evidence_Num_Record/std": 0.7066960334777832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679574370384216, "step": 139, "train_speed(iter/s)": 0.018906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 597.0, "completions/mean_length": 384.5, "completions/min_length": 267.0, "entropy/max": 0.63671875, "entropy/mean": 0.3984375, "entropy/min": 0.251953125, "epoch": 0.14, "grad_norm": 1.421840708779392, "kl": 0.1748046875, "learning_rate": 1.9161084574320692e-06, "loss": 0.0017522408161312342, "memory(GiB)": 146.12, "reward": 1.4421076774597168, "reward_std": 0.21170346438884735, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28149551153182983, "rewards/EvidenceHallucination/std": 0.3998582363128662, "rewards/Evidence_Num_Record/mean": 2.857142925262451, "rewards/Evidence_Num_Record/std": 0.6466208100318909, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3858085870742798, "rewards/VideoAccuracy/std": 0.46080002188682556, "step": 140, "train_speed(iter/s)": 0.018897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 967.0, "completions/mean_length": 508.1428527832031, "completions/min_length": 250.0, "entropy/max": 0.8671875, "entropy/mean": 0.33984375, "entropy/min": 0.11572265625, "epoch": 0.141, "grad_norm": 0.9979565047966674, "kl": 0.11572265625, "learning_rate": 1.91483156370918e-06, "loss": 0.0013793944381177425, "memory(GiB)": 146.12, "reward": 2.0592873096466064, "reward_std": 0.10511618852615356, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4793519973754883, "rewards/EvidenceHallucination/std": 0.37820374965667725, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 0.9385906457901001, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7634169459342957, "rewards/VideoAccuracy/std": 0.3961738646030426, "step": 141, "train_speed(iter/s)": 0.018916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 745.0, "completions/mean_length": 470.90478515625, "completions/min_length": 286.0, "entropy/max": 1.7265625, "entropy/mean": 0.578125, "entropy/min": 0.259765625, "epoch": 0.142, "grad_norm": 1.2861904590160196, "kl": 0.16796875, "learning_rate": 1.9135454576426007e-06, "loss": 0.0017059104284271598, "memory(GiB)": 146.12, "reward": 1.5634406805038452, "reward_std": 0.34225767850875854, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3172035217285156, "rewards/EvidenceHallucination/std": 0.3703348636627197, "rewards/Evidence_Num_Record/mean": 3.0, "rewards/Evidence_Num_Record/std": 0.7963330745697021, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5, "rewards/VideoAccuracy/std": 0.5060608386993408, "step": 142, "train_speed(iter/s)": 0.018901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 536.0, "completions/mean_length": 395.9047546386719, "completions/min_length": 265.0, "entropy/max": 0.55859375, "entropy/mean": 0.388671875, "entropy/min": 0.2265625, "epoch": 0.143, "grad_norm": 1.304361941814836, "kl": 0.1533203125, "learning_rate": 1.912250152183405e-06, "loss": 0.0015516172861680388, "memory(GiB)": 146.12, "reward": 1.2907675504684448, "reward_std": 0.37060222029685974, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14767104387283325, "rewards/EvidenceHallucination/std": 0.3083168566226959, "rewards/Evidence_Num_Record/mean": 2.9761905670166016, "rewards/Evidence_Num_Record/std": 1.1788398027420044, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.261233389377594, "rewards/VideoAccuracy/std": 0.44386783242225647, "step": 143, "train_speed(iter/s)": 0.01892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/mean_length": 545.5, "completions/min_length": 242.0, "entropy/max": 0.65234375, "entropy/mean": 0.37890625, "entropy/min": 0.2021484375, "epoch": 0.144, "grad_norm": 1.095160698262503, "kl": 0.1591796875, "learning_rate": 1.910945660375305e-06, "loss": 0.0016175673808902502, "memory(GiB)": 146.12, "reward": 1.6900311708450317, "reward_std": 0.21441495418548584, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3284996449947357, "rewards/EvidenceHallucination/std": 0.3852420151233673, "rewards/Evidence_Num_Record/mean": 2.952380895614624, "rewards/Evidence_Num_Record/std": 0.961512565612793, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4909977316856384, "rewards/VideoAccuracy/std": 0.4495478868484497, "step": 144, "train_speed(iter/s)": 0.018899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 642.047607421875, "completions/min_length": 350.0, "entropy/max": 1.9140625, "entropy/mean": 0.45703125, "entropy/min": 0.04736328125, "epoch": 0.145, "grad_norm": 1.6791630968254418, "kl": 0.2080078125, "learning_rate": 1.9096319953545185e-06, "loss": 0.0015657602343708277, "memory(GiB)": 146.12, "reward": 1.7518750429153442, "reward_std": 0.31135857105255127, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3737892210483551, "rewards/EvidenceHallucination/std": 0.3780079483985901, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.9891983270645142, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6223553419113159, "rewards/VideoAccuracy/std": 0.4546234607696533, "step": 145, "train_speed(iter/s)": 0.018848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 558.0, "completions/mean_length": 372.26190185546875, "completions/min_length": 224.0, "entropy/max": 0.77734375, "entropy/mean": 0.41015625, "entropy/min": 0.21875, "epoch": 0.146, "grad_norm": 1.3850014837970217, "kl": 0.1982421875, "learning_rate": 1.908309170349637e-06, "loss": 0.0019767656922340393, "memory(GiB)": 146.12, "reward": 1.623852014541626, "reward_std": 0.3224193751811981, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3113335371017456, "rewards/EvidenceHallucination/std": 0.3489803671836853, "rewards/Evidence_Num_Record/mean": 2.690476179122925, "rewards/Evidence_Num_Record/std": 0.6803189516067505, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5615853071212769, "rewards/VideoAccuracy/std": 0.4945571720600128, "step": 146, "train_speed(iter/s)": 0.018855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 688.0, "completions/mean_length": 394.6428527832031, "completions/min_length": 251.0, "entropy/max": 0.54296875, "entropy/mean": 0.396484375, "entropy/min": 0.2080078125, "epoch": 0.147, "grad_norm": 1.246688801187698, "kl": 0.17578125, "learning_rate": 1.9069771986814948e-06, "loss": 0.001971013844013214, "memory(GiB)": 146.12, "reward": 1.4567680358886719, "reward_std": 0.1992015838623047, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21758872270584106, "rewards/EvidenceHallucination/std": 0.3374274969100952, "rewards/Evidence_Num_Record/mean": 2.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7741467356681824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.34658363461494446, "rewards/VideoAccuracy/std": 0.4328537881374359, "step": 147, "train_speed(iter/s)": 0.018876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1137.0, "completions/mean_length": 662.4285888671875, "completions/min_length": 333.0, "entropy/max": 1.453125, "entropy/mean": 0.35546875, "entropy/min": 0.1220703125, "epoch": 0.148, "grad_norm": 1.0689311318311303, "kl": 0.103515625, "learning_rate": 1.9056360937630308e-06, "loss": 0.001046686084009707, "memory(GiB)": 146.12, "reward": 1.8244465589523315, "reward_std": 0.2906140685081482, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.382415235042572, "rewards/EvidenceHallucination/std": 0.3936961889266968, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 1.3580747842788696, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6193921566009521, "rewards/VideoAccuracy/std": 0.5261656045913696, "step": 148, "train_speed(iter/s)": 0.018862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 766.0, "completions/mean_length": 440.69049072265625, "completions/min_length": 274.0, "entropy/max": 0.9375, "entropy/mean": 0.462890625, "entropy/min": 0.328125, "epoch": 0.149, "grad_norm": 1.4597438882707154, "kl": 0.1767578125, "learning_rate": 1.9042858690991573e-06, "loss": 0.0017879819497466087, "memory(GiB)": 146.12, "reward": 1.8150097131729126, "reward_std": 0.3396349847316742, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3845720887184143, "rewards/EvidenceHallucination/std": 0.3300588130950928, "rewards/Evidence_Num_Record/mean": 2.9761905670166016, "rewards/Evidence_Num_Record/std": 0.8406761288642883, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.738095223903656, "rewards/VideoAccuracy/std": 0.44500064849853516, "step": 149, "train_speed(iter/s)": 0.018835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 500.0, "completions/mean_length": 376.73809814453125, "completions/min_length": 270.0, "entropy/max": 0.6171875, "entropy/mean": 0.431640625, "entropy/min": 0.298828125, "epoch": 0.15, "grad_norm": 1.4397330213071107, "kl": 0.203125, "learning_rate": 1.9029265382866213e-06, "loss": 0.00204599485732615, "memory(GiB)": 146.12, "reward": 1.2717986106872559, "reward_std": 0.3696291446685791, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.08266858756542206, "rewards/EvidenceHallucination/std": 0.22534288465976715, "rewards/Evidence_Num_Record/mean": 2.547619104385376, "rewards/Evidence_Num_Record/std": 0.66999751329422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2552648186683655, "rewards/VideoAccuracy/std": 0.37430235743522644, "step": 150, "train_speed(iter/s)": 0.018849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 975.0, "completions/mean_length": 588.8333740234375, "completions/min_length": 330.0, "entropy/max": 0.64453125, "entropy/mean": 0.34765625, "entropy/min": 0.10498046875, "epoch": 0.151, "grad_norm": 1.2760505828928683, "kl": 0.11474609375, "learning_rate": 1.901558115013869e-06, "loss": 0.0011610446963459253, "memory(GiB)": 146.12, "reward": 2.082760810852051, "reward_std": 0.2610321044921875, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4390549659729004, "rewards/EvidenceHallucination/std": 0.35406696796417236, "rewards/Evidence_Num_Record/mean": 3.0714285373687744, "rewards/Evidence_Num_Record/std": 0.6768959760665894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.7997115850448608, "rewards/VideoAccuracy/std": 0.4025106430053711, "step": 151, "train_speed(iter/s)": 0.018839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 850.0, "completions/mean_length": 493.19049072265625, "completions/min_length": 322.0, "entropy/max": 1.1484375, "entropy/mean": 0.5390625, "entropy/min": 0.265625, "epoch": 0.152, "grad_norm": 1.3738644394782378, "kl": 0.1865234375, "learning_rate": 1.9001806130609077e-06, "loss": 0.0018724360270425677, "memory(GiB)": 146.12, "reward": 1.5372685194015503, "reward_std": 0.45580458641052246, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3053901493549347, "rewards/EvidenceHallucination/std": 0.3542548418045044, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 0.7696326971054077, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4761904776096344, "rewards/VideoAccuracy/std": 0.5054867267608643, "step": 152, "train_speed(iter/s)": 0.01885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 592.0, "completions/mean_length": 395.5714416503906, "completions/min_length": 253.0, "entropy/max": 0.9140625, "entropy/mean": 0.455078125, "entropy/min": 0.2490234375, "epoch": 0.153, "grad_norm": 1.2599000503893267, "kl": 0.1953125, "learning_rate": 1.8987940462991669e-06, "loss": 0.0019733128137886524, "memory(GiB)": 146.12, "reward": 1.5792686939239502, "reward_std": 0.20565973222255707, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3271608352661133, "rewards/EvidenceHallucination/std": 0.3908017575740814, "rewards/Evidence_Num_Record/mean": 2.809523820877075, "rewards/Evidence_Num_Record/std": 0.6339229345321655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5138365626335144, "rewards/VideoAccuracy/std": 0.47636887431144714, "step": 153, "train_speed(iter/s)": 0.01886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/mean_length": 494.952392578125, "completions/min_length": 242.0, "entropy/max": 0.52734375, "entropy/mean": 0.365234375, "entropy/min": 0.171875, "epoch": 0.154, "grad_norm": 1.375291943581688, "kl": 0.1650390625, "learning_rate": 1.8973984286913583e-06, "loss": 0.0018742814427241683, "memory(GiB)": 146.12, "reward": 1.7530419826507568, "reward_std": 0.14951765537261963, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.324443519115448, "rewards/EvidenceHallucination/std": 0.3472951352596283, "rewards/Evidence_Num_Record/mean": 2.8333334922790527, "rewards/Evidence_Num_Record/std": 0.7297399044036865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5548199415206909, "rewards/VideoAccuracy/std": 0.4582982361316681, "step": 154, "train_speed(iter/s)": 0.018859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 909.0, "completions/mean_length": 511.76190185546875, "completions/min_length": 252.0, "entropy/max": 1.3203125, "entropy/mean": 0.50390625, "entropy/min": 0.1259765625, "epoch": 0.155, "grad_norm": 1.2741065524336352, "kl": 0.154296875, "learning_rate": 1.8959937742913357e-06, "loss": 0.0015646511455997825, "memory(GiB)": 146.12, "reward": 1.5881472826004028, "reward_std": 0.31238678097724915, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2351822406053543, "rewards/EvidenceHallucination/std": 0.38644734025001526, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 1.0873574018478394, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.47444427013397217, "rewards/VideoAccuracy/std": 0.5682789087295532, "step": 155, "train_speed(iter/s)": 0.018856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 526.0, "completions/mean_length": 366.1190490722656, "completions/min_length": 264.0, "entropy/max": 0.55859375, "entropy/mean": 0.390625, "entropy/min": 0.291015625, "epoch": 0.156, "grad_norm": 1.6022103852657883, "kl": 0.2021484375, "learning_rate": 1.8945800972439537e-06, "loss": 0.0020145419985055923, "memory(GiB)": 146.12, "reward": 1.6180630922317505, "reward_std": 0.3773413300514221, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3580820858478546, "rewards/EvidenceHallucination/std": 0.3930751085281372, "rewards/Evidence_Num_Record/mean": 2.690476179122925, "rewards/Evidence_Num_Record/std": 0.7152722477912903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.546446681022644, "rewards/VideoAccuracy/std": 0.5027092099189758, "step": 156, "train_speed(iter/s)": 0.018853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 569.0, "completions/mean_length": 389.9047546386719, "completions/min_length": 246.0, "entropy/max": 0.5546875, "entropy/mean": 0.423828125, "entropy/min": 0.318359375, "epoch": 0.157, "grad_norm": 1.4581570659736076, "kl": 0.19140625, "learning_rate": 1.8931574117849238e-06, "loss": 0.0019151073647662997, "memory(GiB)": 146.12, "reward": 1.7230446338653564, "reward_std": 0.22620552778244019, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3397175073623657, "rewards/EvidenceHallucination/std": 0.3540821075439453, "rewards/Evidence_Num_Record/mean": 2.761904716491699, "rewards/Evidence_Num_Record/std": 0.6172133684158325, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5884343981742859, "rewards/VideoAccuracy/std": 0.47797274589538574, "step": 157, "train_speed(iter/s)": 0.018866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1030.0, "completions/mean_length": 554.0238037109375, "completions/min_length": 305.0, "entropy/max": 1.3046875, "entropy/mean": 0.375, "entropy/min": 0.12451171875, "epoch": 0.158, "grad_norm": 1.0588912913862258, "kl": 0.12109375, "learning_rate": 1.8917257322406732e-06, "loss": 0.0014288020320236683, "memory(GiB)": 146.12, "reward": 1.8552395105361938, "reward_std": 0.1809217780828476, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2964218258857727, "rewards/EvidenceHallucination/std": 0.3188343048095703, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 0.8621610999107361, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.662621796131134, "rewards/VideoAccuracy/std": 0.40555980801582336, "step": 158, "train_speed(iter/s)": 0.018863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/mean_length": 476.4285888671875, "completions/min_length": 277.0, "entropy/max": 2.0, "entropy/mean": 0.54296875, "entropy/min": 0.28515625, "epoch": 0.159, "grad_norm": 1.4483058805078088, "kl": 0.1953125, "learning_rate": 1.8902850730281989e-06, "loss": 0.001996553037315607, "memory(GiB)": 146.12, "reward": 1.9349104166030884, "reward_std": 0.29467448592185974, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5078843235969543, "rewards/EvidenceHallucination/std": 0.3710263967514038, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 1.605196237564087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8333333730697632, "rewards/VideoAccuracy/std": 0.37719547748565674, "step": 159, "train_speed(iter/s)": 0.018853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/mean_length": 391.5, "completions/min_length": 244.0, "entropy/max": 0.78515625, "entropy/mean": 0.416015625, "entropy/min": 0.2333984375, "epoch": 0.16, "grad_norm": 1.3757762186406606, "kl": 0.21484375, "learning_rate": 1.8888354486549234e-06, "loss": 0.0021661133505403996, "memory(GiB)": 146.12, "reward": 1.4399813413619995, "reward_std": 0.225159153342247, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23081883788108826, "rewards/EvidenceHallucination/std": 0.34395676851272583, "rewards/Evidence_Num_Record/mean": 2.9285714626312256, "rewards/Evidence_Num_Record/std": 0.7120173573493958, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.39381763339042664, "rewards/VideoAccuracy/std": 0.4535292387008667, "step": 160, "train_speed(iter/s)": 0.018859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/mean_length": 572.7142944335938, "completions/min_length": 309.0, "entropy/max": 0.6015625, "entropy/mean": 0.314453125, "entropy/min": 0.12255859375, "epoch": 0.161, "grad_norm": 1.218305374780841, "kl": 0.1279296875, "learning_rate": 1.8873768737185478e-06, "loss": 0.0012988243252038956, "memory(GiB)": 146.12, "reward": 2.1747634410858154, "reward_std": 0.13627514243125916, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4461142420768738, "rewards/EvidenceHallucination/std": 0.3420378565788269, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.9865530133247375, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8855405449867249, "rewards/VideoAccuracy/std": 0.18643184006214142, "step": 161, "train_speed(iter/s)": 0.01882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 873.0, "completions/mean_length": 485.8333435058594, "completions/min_length": 229.0, "entropy/max": 1.421875, "entropy/mean": 0.53125, "entropy/min": 0.29296875, "epoch": 0.162, "grad_norm": 1.407074539091761, "kl": 0.2021484375, "learning_rate": 1.8859093629069056e-06, "loss": 0.002040162682533264, "memory(GiB)": 146.12, "reward": 1.7553484439849854, "reward_std": 0.35698962211608887, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4434085786342621, "rewards/EvidenceHallucination/std": 0.3551959693431854, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 1.1699390411376953, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711876034736633, "step": 162, "train_speed(iter/s)": 0.018837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 684.0, "completions/mean_length": 397.1428527832031, "completions/min_length": 269.0, "entropy/max": 1.046875, "entropy/mean": 0.447265625, "entropy/min": 0.25390625, "epoch": 0.163, "grad_norm": 1.1432921146706652, "kl": 0.2099609375, "learning_rate": 1.8844329309978143e-06, "loss": 0.0021164161153137684, "memory(GiB)": 146.12, "reward": 1.4413707256317139, "reward_std": 0.2937353849411011, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24317666888237, "rewards/EvidenceHallucination/std": 0.36700868606567383, "rewards/Evidence_Num_Record/mean": 2.761904716491699, "rewards/Evidence_Num_Record/std": 0.5763435363769531, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3927353620529175, "rewards/VideoAccuracy/std": 0.47045132517814636, "step": 163, "train_speed(iter/s)": 0.018824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/mean_length": 437.0714416503906, "completions/min_length": 277.0, "entropy/max": 0.6015625, "entropy/mean": 0.373046875, "entropy/min": 0.1748046875, "epoch": 0.164, "grad_norm": 1.0396406883407332, "kl": 0.1826171875, "learning_rate": 1.8829475928589268e-06, "loss": 0.0020347093231976032, "memory(GiB)": 146.12, "reward": 1.7839725017547607, "reward_std": 0.04425010085105896, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40866315364837646, "rewards/EvidenceHallucination/std": 0.37608444690704346, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 1.2699053287506104, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5689064860343933, "rewards/VideoAccuracy/std": 0.42913714051246643, "step": 164, "train_speed(iter/s)": 0.018841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1088.0, "completions/mean_length": 554.1666870117188, "completions/min_length": 250.0, "entropy/max": 1.7421875, "entropy/mean": 0.58203125, "entropy/min": 0.1220703125, "epoch": 0.165, "grad_norm": 1.1222930350902727, "kl": 0.154296875, "learning_rate": 1.881453363447582e-06, "loss": 0.0015880585415288806, "memory(GiB)": 146.12, "reward": 1.9694316387176514, "reward_std": 0.30417174100875854, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5211167931556702, "rewards/EvidenceHallucination/std": 0.3707961440086365, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 1.2195179462432861, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7985416054725647, "rewards/VideoAccuracy/std": 0.37672126293182373, "step": 165, "train_speed(iter/s)": 0.018829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 613.0, "completions/mean_length": 397.6190490722656, "completions/min_length": 274.0, "entropy/max": 0.5625, "entropy/mean": 0.41796875, "entropy/min": 0.28125, "epoch": 0.166, "grad_norm": 1.1794028567931532, "kl": 0.2177734375, "learning_rate": 1.8799502578106532e-06, "loss": 0.0021678453776985407, "memory(GiB)": 146.12, "reward": 1.5557883977890015, "reward_std": 0.0970194861292839, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35668614506721497, "rewards/EvidenceHallucination/std": 0.39013928174972534, "rewards/Evidence_Num_Record/mean": 3.0238096714019775, "rewards/Evidence_Num_Record/std": 0.7485952973365784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.484451025724411, "rewards/VideoAccuracy/std": 0.49885839223861694, "step": 166, "train_speed(iter/s)": 0.018822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 668.0, "completions/mean_length": 372.23809814453125, "completions/min_length": 244.0, "entropy/max": 0.59765625, "entropy/mean": 0.384765625, "entropy/min": 0.263671875, "epoch": 0.167, "grad_norm": 1.2363471449646588, "kl": 0.2197265625, "learning_rate": 1.8784382910843975e-06, "loss": 0.002421202138066292, "memory(GiB)": 146.12, "reward": 1.3132166862487793, "reward_std": 0.1221097931265831, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14486762881278992, "rewards/EvidenceHallucination/std": 0.319282203912735, "rewards/Evidence_Num_Record/mean": 2.7857143878936768, "rewards/Evidence_Num_Record/std": 0.8125753402709961, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.21757635474205017, "rewards/VideoAccuracy/std": 0.3917587995529175, "step": 167, "train_speed(iter/s)": 0.018835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 803.0, "completions/mean_length": 514.0238037109375, "completions/min_length": 317.0, "entropy/max": 1.4375, "entropy/mean": 0.421875, "entropy/min": 0.1376953125, "epoch": 0.168, "grad_norm": 0.8942918327959201, "kl": 0.1455078125, "learning_rate": 1.8769174784943029e-06, "loss": 0.0014676781138405204, "memory(GiB)": 146.12, "reward": 1.5616717338562012, "reward_std": 0.07717376947402954, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14648951590061188, "rewards/EvidenceHallucination/std": 0.30986353754997253, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 1.087624430656433, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.39904049038887024, "rewards/VideoAccuracy/std": 0.4326857328414917, "step": 168, "train_speed(iter/s)": 0.018832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 562.0, "completions/mean_length": 399.5714416503906, "completions/min_length": 232.0, "entropy/max": 0.70703125, "entropy/mean": 0.439453125, "entropy/min": 0.283203125, "epoch": 0.169, "grad_norm": 1.160434657100405, "kl": 0.259765625, "learning_rate": 1.8753878353549355e-06, "loss": 0.0026328868698328733, "memory(GiB)": 146.12, "reward": 1.3136122226715088, "reward_std": 0.21220001578330994, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19901356101036072, "rewards/EvidenceHallucination/std": 0.3090404272079468, "rewards/Evidence_Num_Record/mean": 3.309523820877075, "rewards/Evidence_Num_Record/std": 0.8111447691917419, "rewards/Format/mean": 0.8333333730697632, "rewards/Format/std": 0.37719547748565674, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3571428656578064, "rewards/VideoAccuracy/std": 0.48496562242507935, "step": 169, "train_speed(iter/s)": 0.018843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 545.0, "completions/mean_length": 374.8333435058594, "completions/min_length": 278.0, "entropy/max": 0.55859375, "entropy/mean": 0.431640625, "entropy/min": 0.27734375, "epoch": 0.17, "grad_norm": 1.1330419994071816, "kl": 0.2294921875, "learning_rate": 1.873849377069785e-06, "loss": 0.002298670820891857, "memory(GiB)": 146.12, "reward": 1.2922805547714233, "reward_std": 0.29446443915367126, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22717873752117157, "rewards/EvidenceHallucination/std": 0.3880119323730469, "rewards/Evidence_Num_Record/mean": 2.9761905670166016, "rewards/Evidence_Num_Record/std": 0.7485953569412231, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.24684475362300873, "rewards/VideoAccuracy/std": 0.41650208830833435, "step": 170, "train_speed(iter/s)": 0.018827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 767.0, "completions/mean_length": 486.90478515625, "completions/min_length": 318.0, "entropy/max": 0.51171875, "entropy/mean": 0.302734375, "entropy/min": 0.10595703125, "epoch": 0.171, "grad_norm": 1.0696445418655214, "kl": 0.1669921875, "learning_rate": 1.8723021191311089e-06, "loss": 0.0018870094791054726, "memory(GiB)": 146.12, "reward": 1.9103014469146729, "reward_std": 0.09925421327352524, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3888092041015625, "rewards/EvidenceHallucination/std": 0.4217150807380676, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.6339229345321655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.6373014450073242, "rewards/VideoAccuracy/std": 0.4130142033100128, "step": 171, "train_speed(iter/s)": 0.018842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 769.0, "completions/mean_length": 439.5952453613281, "completions/min_length": 306.0, "entropy/max": 1.8671875, "entropy/mean": 0.51953125, "entropy/min": 0.271484375, "epoch": 0.172, "grad_norm": 1.075234487382607, "kl": 0.2373046875, "learning_rate": 1.8707460771197771e-06, "loss": 0.002409239998087287, "memory(GiB)": 146.12, "reward": 1.3555196523666382, "reward_std": 0.20930641889572144, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22997884452342987, "rewards/EvidenceHallucination/std": 0.3749256730079651, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.8611501455307007, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 172, "train_speed(iter/s)": 0.018847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 548.0, "completions/mean_length": 384.0, "completions/min_length": 249.0, "entropy/max": 0.86328125, "entropy/mean": 0.408203125, "entropy/min": 0.275390625, "epoch": 0.173, "grad_norm": 1.2968746910553395, "kl": 0.2451171875, "learning_rate": 1.869181266705116e-06, "loss": 0.002461865544319153, "memory(GiB)": 146.12, "reward": 1.5350407361984253, "reward_std": 0.28135740756988525, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3513045310974121, "rewards/EvidenceHallucination/std": 0.3848385810852051, "rewards/Evidence_Num_Record/mean": 3.1190476417541504, "rewards/Evidence_Num_Record/std": 0.5500501394271851, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.46477967500686646, "rewards/VideoAccuracy/std": 0.47883597016334534, "step": 173, "train_speed(iter/s)": 0.01886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 830.0, "completions/mean_length": 440.3333435058594, "completions/min_length": 290.0, "entropy/max": 1.15625, "entropy/mean": 0.482421875, "entropy/min": 0.138671875, "epoch": 0.174, "grad_norm": 1.8101136368963897, "kl": 0.2109375, "learning_rate": 1.867607703644749e-06, "loss": 0.0021419243421405554, "memory(GiB)": 146.12, "reward": 1.55290949344635, "reward_std": 0.13344544172286987, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2585713565349579, "rewards/EvidenceHallucination/std": 0.3755526840686798, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.8781778812408447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.377385675907135, "rewards/VideoAccuracy/std": 0.48580801486968994, "step": 174, "train_speed(iter/s)": 0.018877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 668.0, "completions/mean_length": 499.5, "completions/min_length": 339.0, "entropy/max": 0.75, "entropy/mean": 0.376953125, "entropy/min": 0.1865234375, "epoch": 0.175, "grad_norm": 1.0080847323448974, "kl": 0.1806640625, "learning_rate": 1.8660254037844386e-06, "loss": 0.0018150052055716515, "memory(GiB)": 146.12, "reward": 1.5558075904846191, "reward_std": 0.2614729702472687, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2841332256793976, "rewards/EvidenceHallucination/std": 0.380180686712265, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.8981203436851501, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.43231427669525146, "rewards/VideoAccuracy/std": 0.4836465120315552, "step": 175, "train_speed(iter/s)": 0.018874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 548.0, "completions/mean_length": 413.3333435058594, "completions/min_length": 292.0, "entropy/max": 0.6171875, "entropy/mean": 0.3984375, "entropy/min": 0.255859375, "epoch": 0.176, "grad_norm": 1.3365246989631465, "kl": 0.26171875, "learning_rate": 1.8644343830579267e-06, "loss": 0.0026401884388178587, "memory(GiB)": 146.12, "reward": 1.65059232711792, "reward_std": 0.32872068881988525, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39835885167121887, "rewards/EvidenceHallucination/std": 0.36213111877441406, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.49679574370384216, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5709205269813538, "rewards/VideoAccuracy/std": 0.5004269480705261, "step": 176, "train_speed(iter/s)": 0.018869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 629.0, "completions/mean_length": 434.66668701171875, "completions/min_length": 254.0, "entropy/max": 0.70703125, "entropy/mean": 0.4140625, "entropy/min": 0.265625, "epoch": 0.177, "grad_norm": 1.1561926358233798, "kl": 0.2373046875, "learning_rate": 1.8628346574867744e-06, "loss": 0.0023757475428283215, "memory(GiB)": 146.12, "reward": 1.5172730684280396, "reward_std": 0.1766224354505539, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2606974244117737, "rewards/EvidenceHallucination/std": 0.37932664155960083, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.800696611404419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.39846688508987427, "rewards/VideoAccuracy/std": 0.5227741599082947, "step": 177, "train_speed(iter/s)": 0.018884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 763.0, "completions/mean_length": 525.1666870117188, "completions/min_length": 368.0, "entropy/max": 0.68359375, "entropy/mean": 0.337890625, "entropy/min": 0.1171875, "epoch": 0.178, "grad_norm": 1.2254625874796086, "kl": 0.1533203125, "learning_rate": 1.8612262431802006e-06, "loss": 0.0015669530257582664, "memory(GiB)": 146.12, "reward": 2.0970797538757324, "reward_std": 0.25572848320007324, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3758601248264313, "rewards/EvidenceHallucination/std": 0.3398178815841675, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.8785083889961243, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8885742425918579, "rewards/VideoAccuracy/std": 0.3620957136154175, "step": 178, "train_speed(iter/s)": 0.018881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/mean_length": 500.4285888671875, "completions/min_length": 279.0, "entropy/max": 0.79296875, "entropy/mean": 0.384765625, "entropy/min": 0.154296875, "epoch": 0.179, "grad_norm": 1.261062645250815, "kl": 0.24609375, "learning_rate": 1.859609156334919e-06, "loss": 0.0025176331400871277, "memory(GiB)": 146.12, "reward": 1.5994611978530884, "reward_std": 0.20451365411281586, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37825754284858704, "rewards/EvidenceHallucination/std": 0.38394975662231445, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.9959309101104736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.523809552192688, "rewards/VideoAccuracy/std": 0.5054867267608643, "step": 179, "train_speed(iter/s)": 0.018878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/mean_length": 370.4761962890625, "completions/min_length": 280.0, "entropy/max": 0.6328125, "entropy/mean": 0.38671875, "entropy/min": 0.244140625, "epoch": 0.18, "grad_norm": 1.3646103318841316, "kl": 0.263671875, "learning_rate": 1.857983413234977e-06, "loss": 0.0028454954735934734, "memory(GiB)": 146.12, "reward": 1.529546856880188, "reward_std": 0.35319921374320984, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2201814204454422, "rewards/EvidenceHallucination/std": 0.376852422952652, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.4722250998020172, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4855104684829712, "rewards/VideoAccuracy/std": 0.46320217847824097, "step": 180, "train_speed(iter/s)": 0.018891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 773.0, "completions/mean_length": 544.0, "completions/min_length": 305.0, "entropy/max": 0.63671875, "entropy/mean": 0.3359375, "entropy/min": 0.125, "epoch": 0.181, "grad_norm": 1.1193707597104698, "kl": 0.1611328125, "learning_rate": 1.856349030251589e-06, "loss": 0.0016444935463368893, "memory(GiB)": 146.12, "reward": 2.023716449737549, "reward_std": 0.14536447823047638, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.34102120995521545, "rewards/EvidenceHallucination/std": 0.3933142125606537, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 1.1087760925292969, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8571428656578064, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.7840837836265564, "rewards/VideoAccuracy/std": 0.4742104113101959, "step": 181, "train_speed(iter/s)": 0.018897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1135.0, "completions/mean_length": 513.047607421875, "completions/min_length": 332.0, "entropy/max": 1.046875, "entropy/mean": 0.484375, "entropy/min": 0.2138671875, "epoch": 0.182, "grad_norm": 1.5659821882077638, "kl": 0.279296875, "learning_rate": 1.8547060238429735e-06, "loss": 0.0028410868253558874, "memory(GiB)": 146.12, "reward": 1.740407943725586, "reward_std": 0.23701772093772888, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4877532720565796, "rewards/EvidenceHallucination/std": 0.3771217167377472, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.1746450662612915, "rewards/Format/mean": 0.9047619104385376, "rewards/Format/std": 0.2971017360687256, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6904761791229248, "rewards/VideoAccuracy/std": 0.4679011106491089, "step": 182, "train_speed(iter/s)": 0.018861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 720.0, "completions/mean_length": 409.0476379394531, "completions/min_length": 243.0, "entropy/max": 0.5625, "entropy/mean": 0.345703125, "entropy/min": 0.09326171875, "epoch": 0.183, "grad_norm": 1.1197768307253084, "kl": 0.29296875, "learning_rate": 1.853054410554187e-06, "loss": 0.0029866299591958523, "memory(GiB)": 146.12, "reward": 1.4120022058486938, "reward_std": 0.3056322932243347, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29936572909355164, "rewards/EvidenceHallucination/std": 0.4119129180908203, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.7543909549713135, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3640337586402893, "rewards/VideoAccuracy/std": 0.4702302813529968, "step": 183, "train_speed(iter/s)": 0.018871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1037.0, "completions/mean_length": 517.8095092773438, "completions/min_length": 321.0, "entropy/max": 0.80859375, "entropy/mean": 0.390625, "entropy/min": 0.1494140625, "epoch": 0.184, "grad_norm": 1.141835746927161, "kl": 0.2421875, "learning_rate": 1.8513942070169568e-06, "loss": 0.0026467707939445972, "memory(GiB)": 146.12, "reward": 1.7807577848434448, "reward_std": 0.2340150773525238, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31633201241493225, "rewards/EvidenceHallucination/std": 0.37761422991752625, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 0.771516740322113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5714285969734192, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.6032055020332336, "rewards/VideoAccuracy/std": 0.5538917779922485, "step": 184, "train_speed(iter/s)": 0.018871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/mean_length": 661.5952758789062, "completions/min_length": 330.0, "entropy/max": 0.62109375, "entropy/mean": 0.357421875, "entropy/min": 0.109375, "epoch": 0.185, "grad_norm": 1.2075006581057381, "kl": 0.1943359375, "learning_rate": 1.8497254299495145e-06, "loss": 0.001978288171812892, "memory(GiB)": 146.12, "reward": 1.7457631826400757, "reward_std": 0.4265967011451721, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.2883176803588867, "rewards/EvidenceHallucination/std": 0.31130462884902954, "rewards/Evidence_Num_Record/mean": 4.5, "rewards/Evidence_Num_Record/std": 1.5499804019927979, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.6428614854812622, "rewards/VideoAccuracy/std": 0.4330059587955475, "step": 185, "train_speed(iter/s)": 0.018865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 652.0, "completions/mean_length": 452.3333435058594, "completions/min_length": 318.0, "entropy/max": 1.2734375, "entropy/mean": 0.42578125, "entropy/min": 0.259765625, "epoch": 0.186, "grad_norm": 1.2800973114877825, "kl": 0.2890625, "learning_rate": 1.8480480961564257e-06, "loss": 0.002903138054534793, "memory(GiB)": 146.12, "reward": 1.6066404581069946, "reward_std": 0.30020588636398315, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35733169317245483, "rewards/EvidenceHallucination/std": 0.3904092311859131, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.0348178148269653, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5351739525794983, "rewards/VideoAccuracy/std": 0.49298569560050964, "step": 186, "train_speed(iter/s)": 0.018857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1105.0, "completions/mean_length": 494.952392578125, "completions/min_length": 324.0, "entropy/max": 0.83984375, "entropy/mean": 0.421875, "entropy/min": 0.244140625, "epoch": 0.187, "grad_norm": 1.237521066789628, "kl": 0.26953125, "learning_rate": 1.846362222528424e-06, "loss": 0.0027416504453867674, "memory(GiB)": 146.12, "reward": 1.415924310684204, "reward_std": 0.2676616311073303, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20467409491539001, "rewards/EvidenceHallucination/std": 0.3532085418701172, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 2.046072006225586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.32737040519714355, "rewards/VideoAccuracy/std": 0.4514332413673401, "step": 187, "train_speed(iter/s)": 0.018878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 878.0, "completions/mean_length": 571.3095092773438, "completions/min_length": 321.0, "entropy/max": 0.53515625, "entropy/mean": 0.33984375, "entropy/min": 0.11767578125, "epoch": 0.188, "grad_norm": 1.1344193286449087, "kl": 0.1787109375, "learning_rate": 1.8446678260422384e-06, "loss": 0.001828239532187581, "memory(GiB)": 146.12, "reward": 1.9192054271697998, "reward_std": 0.2180202454328537, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40210607647895813, "rewards/EvidenceHallucination/std": 0.36509448289871216, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 1.148902416229248, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.7149747610092163, "rewards/VideoAccuracy/std": 0.42083168029785156, "step": 188, "train_speed(iter/s)": 0.018871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 762.0, "completions/mean_length": 479.952392578125, "completions/min_length": 328.0, "entropy/max": 0.79296875, "entropy/mean": 0.404296875, "entropy/min": 0.2412109375, "epoch": 0.189, "grad_norm": 1.2312451315706627, "kl": 0.294921875, "learning_rate": 1.8429649237604214e-06, "loss": 0.0029576425440609455, "memory(GiB)": 146.12, "reward": 1.6795845031738281, "reward_std": 0.22131046652793884, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42173194885253906, "rewards/EvidenceHallucination/std": 0.36969906091690063, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.064690351486206, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679577350616455, "step": 189, "train_speed(iter/s)": 0.018884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 833.0, "completions/mean_length": 450.6428527832031, "completions/min_length": 323.0, "entropy/max": 1.0078125, "entropy/mean": 0.3984375, "entropy/min": 0.2431640625, "epoch": 0.19, "grad_norm": 1.2392294157062458, "kl": 0.275390625, "learning_rate": 1.8412535328311812e-06, "loss": 0.002766430377960205, "memory(GiB)": 146.12, "reward": 1.594218373298645, "reward_std": 0.17641466856002808, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.208940327167511, "rewards/EvidenceHallucination/std": 0.34979137778282166, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 1.0135550498962402, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5524303913116455, "rewards/VideoAccuracy/std": 0.48600202798843384, "step": 190, "train_speed(iter/s)": 0.018867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/mean_length": 619.3333740234375, "completions/min_length": 386.0, "entropy/max": 0.65234375, "entropy/mean": 0.357421875, "entropy/min": 0.12109375, "epoch": 0.191, "grad_norm": 1.1769789565441817, "kl": 0.1806640625, "learning_rate": 1.8395336704882047e-06, "loss": 0.0018596196314319968, "memory(GiB)": 146.12, "reward": 2.0896453857421875, "reward_std": 0.23886415362358093, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48446333408355713, "rewards/EvidenceHallucination/std": 0.3222953677177429, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.494861364364624, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.7142857313156128, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.8498954772949219, "rewards/VideoAccuracy/std": 0.36691805720329285, "step": 191, "train_speed(iter/s)": 0.018856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1011.0, "completions/mean_length": 551.2619018554688, "completions/min_length": 286.0, "entropy/max": 0.625, "entropy/mean": 0.42578125, "entropy/min": 0.240234375, "epoch": 0.192, "grad_norm": 0.8012267253059627, "kl": 0.265625, "learning_rate": 1.8378053540504871e-06, "loss": 0.0027068699710071087, "memory(GiB)": 146.12, "reward": 1.3803496360778809, "reward_std": 0.007227580063045025, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2350817620754242, "rewards/EvidenceHallucination/std": 0.3490041494369507, "rewards/Evidence_Num_Record/mean": 5.023809432983398, "rewards/Evidence_Num_Record/std": 1.918944001197815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3333333432674408, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 192, "train_speed(iter/s)": 0.018883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 506.0, "completions/min_length": 308.0, "entropy/max": 0.7578125, "entropy/mean": 0.40234375, "entropy/min": 0.058349609375, "epoch": 0.193, "grad_norm": 0.6008917441507103, "kl": 0.271484375, "learning_rate": 1.8360686009221558e-06, "loss": 0.002894636942073703, "memory(GiB)": 146.12, "reward": 1.1222654581069946, "reward_std": 0.10150207579135895, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.10451966524124146, "rewards/EvidenceHallucination/std": 0.28947871923446655, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.2337208986282349, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.11326626688241959, "rewards/VideoAccuracy/std": 0.287361741065979, "step": 193, "train_speed(iter/s)": 0.01886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1009.0, "completions/mean_length": 521.357177734375, "completions/min_length": 284.0, "entropy/max": 0.56640625, "entropy/mean": 0.373046875, "entropy/min": 0.1318359375, "epoch": 0.194, "grad_norm": 1.2395668477965114, "kl": 0.25, "learning_rate": 1.8343234285922952e-06, "loss": 0.0025251524057239294, "memory(GiB)": 146.12, "reward": 1.9593502283096313, "reward_std": 0.19612763822078705, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42780205607414246, "rewards/EvidenceHallucination/std": 0.3869655430316925, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.5897464752197266, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5476190447807312, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.7642660737037659, "rewards/VideoAccuracy/std": 0.501839816570282, "step": 194, "train_speed(iter/s)": 0.018881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/mean_length": 682.5, "completions/min_length": 403.0, "entropy/max": 0.7109375, "entropy/mean": 0.349609375, "entropy/min": 0.1513671875, "epoch": 0.195, "grad_norm": 1.0347054760255026, "kl": 0.19140625, "learning_rate": 1.832569854634771e-06, "loss": 0.001986590214073658, "memory(GiB)": 146.12, "reward": 1.6080275774002075, "reward_std": 0.26118576526641846, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30487632751464844, "rewards/EvidenceHallucination/std": 0.3554747700691223, "rewards/Evidence_Num_Record/mean": 5.738095283508301, "rewards/Evidence_Num_Record/std": 2.4099340438842773, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.48038557171821594, "rewards/VideoAccuracy/std": 0.43697768449783325, "step": 195, "train_speed(iter/s)": 0.018862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 746.0, "completions/mean_length": 467.66668701171875, "completions/min_length": 281.0, "entropy/max": 0.953125, "entropy/mean": 0.376953125, "entropy/min": 0.197265625, "epoch": 0.196, "grad_norm": 1.2986731761415105, "kl": 0.298828125, "learning_rate": 1.8308078967080545e-06, "loss": 0.003042693017050624, "memory(GiB)": 146.12, "reward": 1.410247802734375, "reward_std": 0.33110353350639343, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27933555841445923, "rewards/EvidenceHallucination/std": 0.38819193840026855, "rewards/Evidence_Num_Record/mean": 4.833333492279053, "rewards/Evidence_Num_Record/std": 1.2477622032165527, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.35438072681427, "rewards/VideoAccuracy/std": 0.4773140847682953, "step": 196, "train_speed(iter/s)": 0.018854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 763.0, "completions/mean_length": 456.40478515625, "completions/min_length": 303.0, "entropy/max": 1.015625, "entropy/mean": 0.423828125, "entropy/min": 0.208984375, "epoch": 0.197, "grad_norm": 1.3153179989585684, "kl": 0.2890625, "learning_rate": 1.8290375725550415e-06, "loss": 0.00291400752030313, "memory(GiB)": 146.12, "reward": 1.5776203870773315, "reward_std": 0.3823457956314087, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29908379912376404, "rewards/EvidenceHallucination/std": 0.384405255317688, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.2003192901611328, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.46790117025375366, "rewards/VideoAccuracy/mean": 0.4558989405632019, "rewards/VideoAccuracy/std": 0.5101503729820251, "step": 197, "train_speed(iter/s)": 0.018864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 875.0, "completions/mean_length": 540.5952758789062, "completions/min_length": 364.0, "entropy/max": 1.6953125, "entropy/mean": 0.435546875, "entropy/min": 0.138671875, "epoch": 0.198, "grad_norm": 1.1143950762270292, "kl": 0.203125, "learning_rate": 1.827258900002877e-06, "loss": 0.0020716446451842785, "memory(GiB)": 146.12, "reward": 2.1092474460601807, "reward_std": 0.19651727378368378, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42437508702278137, "rewards/EvidenceHallucination/std": 0.40561822056770325, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.09480881690979, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5952380895614624, "rewards/HonestTime/std": 0.49679574370384216, "rewards/VideoAccuracy/mean": 0.9053246974945068, "rewards/VideoAccuracy/std": 0.4163197875022888, "step": 198, "train_speed(iter/s)": 0.018861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/mean_length": 471.71429443359375, "completions/min_length": 322.0, "entropy/max": 0.75390625, "entropy/mean": 0.390625, "entropy/min": 0.279296875, "epoch": 0.199, "grad_norm": 1.1737449138228981, "kl": 0.291015625, "learning_rate": 1.825471896962774e-06, "loss": 0.0029436303302645683, "memory(GiB)": 146.12, "reward": 1.9432400465011597, "reward_std": 0.023910468444228172, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5495333075523376, "rewards/EvidenceHallucination/std": 0.30792468786239624, "rewards/Evidence_Num_Record/mean": 4.547619342803955, "rewards/Evidence_Num_Record/std": 1.06387197971344, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8333333730697632, "rewards/VideoAccuracy/std": 0.37719547748565674, "step": 199, "train_speed(iter/s)": 0.018846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 575.0, "completions/mean_length": 398.6190490722656, "completions/min_length": 303.0, "entropy/max": 0.67578125, "entropy/mean": 0.37109375, "entropy/min": 0.2041015625, "epoch": 0.2, "grad_norm": 1.2972484508452815, "kl": 0.306640625, "learning_rate": 1.8236765814298327e-06, "loss": 0.00309559958986938, "memory(GiB)": 146.12, "reward": 1.3546700477600098, "reward_std": 0.40779995918273926, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24729494750499725, "rewards/EvidenceHallucination/std": 0.3658376932144165, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.7213357090950012, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.30521106719970703, "rewards/VideoAccuracy/std": 0.4353443384170532, "step": 200, "train_speed(iter/s)": 0.018843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1003.0, "completions/mean_length": 479.4761962890625, "completions/min_length": 324.0, "entropy/max": 0.65625, "entropy/mean": 0.349609375, "entropy/min": 0.095703125, "epoch": 0.201, "grad_norm": 1.1284051039205776, "kl": 0.20703125, "learning_rate": 1.821872971482861e-06, "loss": 0.002093291375786066, "memory(GiB)": 146.12, "reward": 1.8066730499267578, "reward_std": 0.13066262006759644, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3956207036972046, "rewards/EvidenceHallucination/std": 0.37852877378463745, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.6559829115867615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.5323108434677124, "rewards/VideoAccuracy/std": 0.35995954275131226, "step": 201, "train_speed(iter/s)": 0.018768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 819.0, "completions/mean_length": 474.6190490722656, "completions/min_length": 293.0, "entropy/max": 1.28125, "entropy/mean": 0.51953125, "entropy/min": 0.30859375, "epoch": 0.202, "grad_norm": 1.3102689029487984, "kl": 0.294921875, "learning_rate": 1.8200610852841911e-06, "loss": 0.0029544036369770765, "memory(GiB)": 146.12, "reward": 1.6257050037384033, "reward_std": 0.10190405696630478, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.390429824590683, "rewards/EvidenceHallucination/std": 0.378105491399765, "rewards/Evidence_Num_Record/mean": 4.833333492279053, "rewards/Evidence_Num_Record/std": 1.5759884119033813, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5476190447807312, "rewards/VideoAccuracy/std": 0.5037605166435242, "step": 202, "train_speed(iter/s)": 0.018772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 509.0, "completions/mean_length": 394.952392578125, "completions/min_length": 252.0, "entropy/max": 0.6171875, "entropy/mean": 0.4140625, "entropy/min": 0.306640625, "epoch": 0.203, "grad_norm": 1.410439577448358, "kl": 0.2890625, "learning_rate": 1.8182409410794966e-06, "loss": 0.0028976770117878914, "memory(GiB)": 146.12, "reward": 1.54341459274292, "reward_std": 0.264932245016098, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31481143832206726, "rewards/EvidenceHallucination/std": 0.36222589015960693, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 0.5942034721374512, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4804523289203644, "rewards/VideoAccuracy/std": 0.4593258798122406, "step": 203, "train_speed(iter/s)": 0.018778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 446.66668701171875, "completions/min_length": 242.0, "entropy/max": 1.125, "entropy/mean": 0.39453125, "entropy/min": 0.049560546875, "epoch": 0.204, "grad_norm": 1.333677043718328, "kl": 0.259765625, "learning_rate": 1.8164125571976096e-06, "loss": 0.002771096769720316, "memory(GiB)": 146.12, "reward": 1.766880989074707, "reward_std": 0.2664657533168793, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3919958472251892, "rewards/EvidenceHallucination/std": 0.3605327010154724, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.8039536476135254, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.5714285969734192, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.5861007571220398, "rewards/VideoAccuracy/std": 0.47626814246177673, "step": 204, "train_speed(iter/s)": 0.018727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 784.0, "completions/mean_length": 503.5238037109375, "completions/min_length": 311.0, "entropy/max": 0.76171875, "entropy/mean": 0.39453125, "entropy/min": 0.11669921875, "epoch": 0.205, "grad_norm": 1.2386194810672908, "kl": 0.22265625, "learning_rate": 1.8145759520503357e-06, "loss": 0.0022671599872410297, "memory(GiB)": 146.12, "reward": 2.2045955657958984, "reward_std": 0.07308042049407959, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6259084343910217, "rewards/EvidenceHallucination/std": 0.24700582027435303, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.0581248998641968, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 1.0175089836120605, "rewards/VideoAccuracy/std": 0.12256230413913727, "step": 205, "train_speed(iter/s)": 0.01874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 582.0, "completions/mean_length": 379.952392578125, "completions/min_length": 211.0, "entropy/max": 0.8984375, "entropy/mean": 0.388671875, "entropy/min": 0.24609375, "epoch": 0.206, "grad_norm": 1.027901011642713, "kl": 0.2890625, "learning_rate": 1.812731144132268e-06, "loss": 0.0029317340813577175, "memory(GiB)": 146.12, "reward": 1.563454031944275, "reward_std": 0.01673251762986183, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3172701895236969, "rewards/EvidenceHallucination/std": 0.3400120735168457, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 1.000870704650879, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5, "rewards/VideoAccuracy/std": 0.5060608386993408, "step": 206, "train_speed(iter/s)": 0.018738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 398.16668701171875, "completions/min_length": 237.0, "entropy/max": 0.5625, "entropy/mean": 0.41796875, "entropy/min": 0.22265625, "epoch": 0.207, "grad_norm": 1.4431055373612067, "kl": 0.291015625, "learning_rate": 1.8108781520206018e-06, "loss": 0.0029396088793873787, "memory(GiB)": 146.12, "reward": 1.3641424179077148, "reward_std": 0.3134014308452606, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18790483474731445, "rewards/EvidenceHallucination/std": 0.3463141620159149, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.8502919673919678, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.46790117025375366, "rewards/VideoAccuracy/mean": 0.2646566331386566, "rewards/VideoAccuracy/std": 0.4310843050479889, "step": 207, "train_speed(iter/s)": 0.018751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 783.0, "completions/mean_length": 452.6190490722656, "completions/min_length": 304.0, "entropy/max": 0.84375, "entropy/mean": 0.345703125, "entropy/min": 0.1494140625, "epoch": 0.208, "grad_norm": 0.9803073502002945, "kl": 0.2060546875, "learning_rate": 1.8090169943749474e-06, "loss": 0.0020949551835656166, "memory(GiB)": 146.12, "reward": 1.8597743511199951, "reward_std": 0.1580304503440857, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.43828946352005005, "rewards/EvidenceHallucination/std": 0.35627278685569763, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.7593780159950256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.6435449719429016, "rewards/VideoAccuracy/std": 0.5075361132621765, "step": 208, "train_speed(iter/s)": 0.018752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 615.0, "completions/mean_length": 419.3333435058594, "completions/min_length": 227.0, "entropy/max": 0.953125, "entropy/mean": 0.447265625, "entropy/min": 0.28125, "epoch": 0.209, "grad_norm": 0.8107368307761447, "kl": 0.291015625, "learning_rate": 1.8071476899371413e-06, "loss": 0.0029117565136402845, "memory(GiB)": 146.12, "reward": 1.3520643711090088, "reward_std": 0.07225346565246582, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21270343661308289, "rewards/EvidenceHallucination/std": 0.3311234712600708, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9997096061706543, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 209, "train_speed(iter/s)": 0.018737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/mean_length": 389.952392578125, "completions/min_length": 297.0, "entropy/max": 0.60546875, "entropy/mean": 0.42578125, "entropy/min": 0.26171875, "epoch": 0.21, "grad_norm": 1.3087588124307798, "kl": 0.296875, "learning_rate": 1.8052702575310586e-06, "loss": 0.002977391704916954, "memory(GiB)": 146.12, "reward": 1.514713168144226, "reward_std": 0.31183916330337524, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.34595897793769836, "rewards/EvidenceHallucination/std": 0.39453673362731934, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.6270147562026978, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4455213248729706, "rewards/VideoAccuracy/std": 0.4912121891975403, "step": 210, "train_speed(iter/s)": 0.018747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 945.0, "completions/mean_length": 507.5952453613281, "completions/min_length": 297.0, "entropy/max": 0.81640625, "entropy/mean": 0.361328125, "entropy/min": 0.1650390625, "epoch": 0.211, "grad_norm": 1.2637694787988318, "kl": 0.2041015625, "learning_rate": 1.8033847160624225e-06, "loss": 0.0020564354490488768, "memory(GiB)": 146.12, "reward": 2.2377514839172363, "reward_std": 0.20628312230110168, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40893810987472534, "rewards/EvidenceHallucination/std": 0.31318873167037964, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9559638500213623, "rewards/VideoAccuracy/std": 0.4015641510486603, "step": 211, "train_speed(iter/s)": 0.018744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/mean_length": 474.71429443359375, "completions/min_length": 273.0, "entropy/max": 1.171875, "entropy/mean": 0.474609375, "entropy/min": 0.1923828125, "epoch": 0.212, "grad_norm": 0.9626542864618389, "kl": 0.28515625, "learning_rate": 1.801491084518615e-06, "loss": 0.0029152999632060528, "memory(GiB)": 146.12, "reward": 1.4065104722976685, "reward_std": 0.07577715069055557, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24683839082717896, "rewards/EvidenceHallucination/std": 0.34258249402046204, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 2.2638142108917236, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3571428656578064, "rewards/VideoAccuracy/std": 0.48496562242507935, "step": 212, "train_speed(iter/s)": 0.018742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 723.0, "completions/mean_length": 386.3333435058594, "completions/min_length": 282.0, "entropy/max": 0.8359375, "entropy/mean": 0.41796875, "entropy/min": 0.220703125, "epoch": 0.213, "grad_norm": 1.3631479721595794, "kl": 0.3203125, "learning_rate": 1.7995893819684848e-06, "loss": 0.003235860262066126, "memory(GiB)": 146.12, "reward": 1.5252161026000977, "reward_std": 0.17157170176506042, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3588860034942627, "rewards/EvidenceHallucination/std": 0.4134328067302704, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.8968262076377869, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4534388482570648, "rewards/VideoAccuracy/std": 0.4595082998275757, "step": 213, "train_speed(iter/s)": 0.018735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/mean_length": 408.71429443359375, "completions/min_length": 225.0, "entropy/max": 0.55859375, "entropy/mean": 0.35546875, "entropy/min": 0.11865234375, "epoch": 0.214, "grad_norm": 1.1554583596717818, "kl": 0.28515625, "learning_rate": 1.7976796275621553e-06, "loss": 0.0030635735020041466, "memory(GiB)": 146.12, "reward": 1.7635283470153809, "reward_std": 0.20307080447673798, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35455483198165894, "rewards/EvidenceHallucination/std": 0.3721371293067932, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.7730206847190857, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.5688078999519348, "rewards/VideoAccuracy/std": 0.5657344460487366, "step": 214, "train_speed(iter/s)": 0.018742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 931.0, "completions/mean_length": 519.6428833007812, "completions/min_length": 308.0, "entropy/max": 0.92578125, "entropy/mean": 0.41015625, "entropy/min": 0.142578125, "epoch": 0.215, "grad_norm": 0.7731173289573026, "kl": 0.2353515625, "learning_rate": 1.795761840530832e-06, "loss": 0.002380756661295891, "memory(GiB)": 146.12, "reward": 1.489245891571045, "reward_std": 0.04518473148345947, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2751462161540985, "rewards/EvidenceHallucination/std": 0.3985624313354492, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.1526870727539062, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.367549866437912, "rewards/VideoAccuracy/std": 0.47184836864471436, "step": 215, "train_speed(iter/s)": 0.01874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 567.0, "completions/mean_length": 389.0714416503906, "completions/min_length": 298.0, "entropy/max": 0.515625, "entropy/mean": 0.421875, "entropy/min": 0.265625, "epoch": 0.216, "grad_norm": 1.1110957828022496, "kl": 0.330078125, "learning_rate": 1.7938360401866094e-06, "loss": 0.0033316153567284346, "memory(GiB)": 146.12, "reward": 1.5227714776992798, "reward_std": 0.148939311504364, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35195261240005493, "rewards/EvidenceHallucination/std": 0.3986785113811493, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.8594626188278198, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4523809552192688, "rewards/VideoAccuracy/std": 0.5037605166435242, "step": 216, "train_speed(iter/s)": 0.018743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 670.0, "completions/mean_length": 392.4761962890625, "completions/min_length": 282.0, "entropy/max": 0.82421875, "entropy/mean": 0.4609375, "entropy/min": 0.302734375, "epoch": 0.217, "grad_norm": 0.978112983100945, "kl": 0.314453125, "learning_rate": 1.791902245922275e-06, "loss": 0.0031728388275951147, "memory(GiB)": 146.12, "reward": 1.2725377082824707, "reward_std": 0.15308094024658203, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.12878981232643127, "rewards/EvidenceHallucination/std": 0.2983783185482025, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.8570944666862488, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.46790117025375366, "rewards/VideoAccuracy/mean": 0.18487496674060822, "rewards/VideoAccuracy/std": 0.4087332785129547, "step": 217, "train_speed(iter/s)": 0.018757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 791.0, "completions/mean_length": 538.7142944335938, "completions/min_length": 359.0, "entropy/max": 0.890625, "entropy/mean": 0.376953125, "entropy/min": 0.1630859375, "epoch": 0.218, "grad_norm": 1.1056095534057395, "kl": 0.2060546875, "learning_rate": 1.789960477211116e-06, "loss": 0.0020709068048745394, "memory(GiB)": 146.12, "reward": 1.8567214012145996, "reward_std": 0.18893079459667206, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31860214471817017, "rewards/EvidenceHallucination/std": 0.3428250849246979, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 0.9856696724891663, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074110031128, "rewards/VideoAccuracy/mean": 0.6691913604736328, "rewards/VideoAccuracy/std": 0.4289722144603729, "step": 218, "train_speed(iter/s)": 0.018745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 617.0, "completions/mean_length": 412.3809509277344, "completions/min_length": 282.0, "entropy/max": 1.2890625, "entropy/mean": 0.51953125, "entropy/min": 0.291015625, "epoch": 0.219, "grad_norm": 1.4397652540632981, "kl": 0.330078125, "learning_rate": 1.7880107536067217e-06, "loss": 0.0033206443767994642, "memory(GiB)": 146.12, "reward": 1.8000737428665161, "reward_std": 0.2137765884399414, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5479874014854431, "rewards/EvidenceHallucination/std": 0.3887883722782135, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9997095465660095, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6904761791229248, "rewards/VideoAccuracy/std": 0.4679011106491089, "step": 219, "train_speed(iter/s)": 0.018754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 439.0, "completions/mean_length": 357.7857360839844, "completions/min_length": 253.0, "entropy/max": 0.80859375, "entropy/mean": 0.462890625, "entropy/min": 0.333984375, "epoch": 0.22, "grad_norm": 0.9903506450991835, "kl": 0.330078125, "learning_rate": 1.7860530947427874e-06, "loss": 0.0032826876267790794, "memory(GiB)": 146.12, "reward": 1.127616047859192, "reward_std": 0.21693232655525208, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.07094395905733109, "rewards/EvidenceHallucination/std": 0.22947509586811066, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.6502032279968262, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.11342722177505493, "rewards/VideoAccuracy/std": 0.28198370337486267, "step": 220, "train_speed(iter/s)": 0.018789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 684.0, "completions/mean_length": 436.4285888671875, "completions/min_length": 331.0, "entropy/max": 0.671875, "entropy/mean": 0.3359375, "entropy/min": 0.1513671875, "epoch": 0.221, "grad_norm": 1.197858218367104, "kl": 0.2138671875, "learning_rate": 1.7840875203329158e-06, "loss": 0.002129746600985527, "memory(GiB)": 146.12, "reward": 2.172039747238159, "reward_std": 0.12870250642299652, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4757545590400696, "rewards/EvidenceHallucination/std": 0.35233354568481445, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.6921662092208862, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8816505670547485, "rewards/VideoAccuracy/std": 0.38467472791671753, "step": 221, "train_speed(iter/s)": 0.018781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 697.0, "completions/mean_length": 442.66668701171875, "completions/min_length": 281.0, "entropy/max": 1.2890625, "entropy/mean": 0.490234375, "entropy/min": 0.255859375, "epoch": 0.222, "grad_norm": 1.1591322108226676, "kl": 0.3125, "learning_rate": 1.7821140501704192e-06, "loss": 0.0031443522311747074, "memory(GiB)": 146.12, "reward": 1.5666881799697876, "reward_std": 0.1563645899295807, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33344078063964844, "rewards/EvidenceHallucination/std": 0.3708530366420746, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.2230842113494873, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5, "rewards/VideoAccuracy/std": 0.506060779094696, "step": 222, "train_speed(iter/s)": 0.018755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 628.0, "completions/mean_length": 379.3809509277344, "completions/min_length": 228.0, "entropy/max": 1.0625, "entropy/mean": 0.458984375, "entropy/min": 0.26953125, "epoch": 0.223, "grad_norm": 1.1508011621624752, "kl": 0.353515625, "learning_rate": 1.7801327041281207e-06, "loss": 0.003560734912753105, "memory(GiB)": 146.12, "reward": 1.4059683084487915, "reward_std": 0.232594296336174, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2645784020423889, "rewards/EvidenceHallucination/std": 0.3644518256187439, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.7265497446060181, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3530525863170624, "rewards/VideoAccuracy/std": 0.46768462657928467, "step": 223, "train_speed(iter/s)": 0.018755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 634.0, "completions/mean_length": 371.21429443359375, "completions/min_length": 238.0, "entropy/max": 0.84765625, "entropy/mean": 0.416015625, "entropy/min": 0.12353515625, "epoch": 0.224, "grad_norm": 1.2817648932774737, "kl": 0.302734375, "learning_rate": 1.7781435021581525e-06, "loss": 0.003047728445380926, "memory(GiB)": 146.12, "reward": 1.99153470993042, "reward_std": 0.16718348860740662, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5285584926605225, "rewards/EvidenceHallucination/std": 0.41436928510665894, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.8281487822532654, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.7572515606880188, "rewards/VideoAccuracy/std": 0.5330286622047424, "step": 224, "train_speed(iter/s)": 0.018762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 684.0, "completions/mean_length": 475.952392578125, "completions/min_length": 315.0, "entropy/max": 0.6875, "entropy/mean": 0.396484375, "entropy/min": 0.12353515625, "epoch": 0.225, "grad_norm": 1.1873840762665278, "kl": 0.2373046875, "learning_rate": 1.7761464642917567e-06, "loss": 0.0024001363199204206, "memory(GiB)": 146.12, "reward": 1.6954164505004883, "reward_std": 0.313042014837265, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4449019730091095, "rewards/EvidenceHallucination/std": 0.4287704825401306, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.2308934926986694, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5397694706916809, "rewards/VideoAccuracy/std": 0.4395271837711334, "step": 225, "train_speed(iter/s)": 0.018754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 434.26190185546875, "completions/min_length": 307.0, "entropy/max": 2.375, "entropy/mean": 0.58984375, "entropy/min": 0.1044921875, "epoch": 0.226, "grad_norm": 1.373347458456585, "kl": 0.333984375, "learning_rate": 1.7741416106390826e-06, "loss": 0.0035560843534767628, "memory(GiB)": 146.12, "reward": 1.257364273071289, "reward_std": 0.19904689490795135, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20269151031970978, "rewards/EvidenceHallucination/std": 0.36894580721855164, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.6115421056747437, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.240635484457016, "rewards/VideoAccuracy/std": 0.4220869541168213, "step": 226, "train_speed(iter/s)": 0.018727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07142857142857142, "completions/max_length": 2625.0, "completions/mean_length": 534.2142944335938, "completions/min_length": 309.0, "entropy/max": 0.76171875, "entropy/mean": 0.376953125, "entropy/min": 0.04052734375, "epoch": 0.227, "grad_norm": 1.2193637822754348, "kl": 0.306640625, "learning_rate": 1.7721289613889834e-06, "loss": 0.003318554488942027, "memory(GiB)": 146.12, "reward": 1.5782241821289062, "reward_std": 0.22096966207027435, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2946185767650604, "rewards/EvidenceHallucination/std": 0.3401840925216675, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.6339229345321655, "rewards/Format/mean": 0.9047619104385376, "rewards/Format/std": 0.297101765871048, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5002529621124268, "rewards/VideoAccuracy/std": 0.5846176743507385, "step": 227, "train_speed(iter/s)": 0.018701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 723.0, "completions/mean_length": 497.9285888671875, "completions/min_length": 328.0, "entropy/max": 1.2265625, "entropy/mean": 0.380859375, "entropy/min": 0.140625, "epoch": 0.228, "grad_norm": 1.2673203843836767, "kl": 0.2080078125, "learning_rate": 1.7701085368088155e-06, "loss": 0.002099959645420313, "memory(GiB)": 146.12, "reward": 1.8202496767044067, "reward_std": 0.2765672206878662, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2655429244041443, "rewards/EvidenceHallucination/std": 0.370466411113739, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 0.8035923838615417, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6338077187538147, "rewards/VideoAccuracy/std": 0.40736258029937744, "step": 228, "train_speed(iter/s)": 0.018703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 627.0, "completions/mean_length": 400.8571472167969, "completions/min_length": 269.0, "entropy/max": 0.75, "entropy/mean": 0.443359375, "entropy/min": 0.263671875, "epoch": 0.229, "grad_norm": 1.2111830048331047, "kl": 0.345703125, "learning_rate": 1.7680803572442319e-06, "loss": 0.003480810672044754, "memory(GiB)": 146.12, "reward": 1.5204336643218994, "reward_std": 0.1846160888671875, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3402631878852844, "rewards/EvidenceHallucination/std": 0.3967270851135254, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.8781777620315552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4523809552192688, "rewards/VideoAccuracy/std": 0.5037605166435242, "step": 229, "train_speed(iter/s)": 0.018701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 372.76190185546875, "completions/min_length": 284.0, "entropy/max": 0.62109375, "entropy/mean": 0.4140625, "entropy/min": 0.2578125, "epoch": 0.23, "grad_norm": 1.0512029932846734, "kl": 0.337890625, "learning_rate": 1.766044443118978e-06, "loss": 0.0034035081043839455, "memory(GiB)": 146.12, "reward": 1.1633344888687134, "reward_std": 0.17522874474525452, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.09813331812620163, "rewards/EvidenceHallucination/std": 0.25174546241760254, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.532345175743103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.1437077820301056, "rewards/VideoAccuracy/std": 0.34388238191604614, "step": 230, "train_speed(iter/s)": 0.018737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 691.0, "completions/mean_length": 464.1190490722656, "completions/min_length": 252.0, "entropy/max": 0.439453125, "entropy/mean": 0.30078125, "entropy/min": 0.15234375, "epoch": 0.231, "grad_norm": 1.1926249390501715, "kl": 0.2158203125, "learning_rate": 1.7640008149346866e-06, "loss": 0.0021739723160862923, "memory(GiB)": 146.12, "reward": 2.4249696731567383, "reward_std": 0.18014255166053772, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5357918739318848, "rewards/EvidenceHallucination/std": 0.38135936856269836, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.7593780755996704, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 1.1273351907730103, "rewards/VideoAccuracy/std": 0.4719649851322174, "step": 231, "train_speed(iter/s)": 0.018719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 924.0, "completions/mean_length": 440.5, "completions/min_length": 314.0, "entropy/max": 1.609375, "entropy/mean": 0.52734375, "entropy/min": 0.2734375, "epoch": 0.232, "grad_norm": 1.046542137226126, "kl": 0.314453125, "learning_rate": 1.761949493270671e-06, "loss": 0.0032124067656695843, "memory(GiB)": 146.12, "reward": 1.3601796627044678, "reward_std": 0.2623414993286133, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2532789707183838, "rewards/EvidenceHallucination/std": 0.392877995967865, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 0.8430904150009155, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011106491089, "step": 232, "train_speed(iter/s)": 0.018726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 420.0, "completions/min_length": 208.0, "entropy/max": 0.796875, "entropy/mean": 0.458984375, "entropy/min": 0.05419921875, "epoch": 0.233, "grad_norm": 1.121574397959357, "kl": 0.314453125, "learning_rate": 1.759890498783717e-06, "loss": 0.003404829418286681, "memory(GiB)": 146.12, "reward": 1.3430793285369873, "reward_std": 0.20950570702552795, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22730125486850739, "rewards/EvidenceHallucination/std": 0.37339580059051514, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.7948732376098633, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 233, "train_speed(iter/s)": 0.018683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 535.0, "completions/mean_length": 393.23809814453125, "completions/min_length": 316.0, "entropy/max": 0.50390625, "entropy/mean": 0.38671875, "entropy/min": 0.255859375, "epoch": 0.234, "grad_norm": 1.4538794911698656, "kl": 0.322265625, "learning_rate": 1.7578238522078768e-06, "loss": 0.0032620998099446297, "memory(GiB)": 146.12, "reward": 1.8541812896728516, "reward_std": 0.2292259782552719, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3325042128562927, "rewards/EvidenceHallucination/std": 0.40219464898109436, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.5902813673019409, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6876804232597351, "rewards/VideoAccuracy/std": 0.4814368188381195, "step": 234, "train_speed(iter/s)": 0.01869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 611.0, "completions/mean_length": 427.19049072265625, "completions/min_length": 339.0, "entropy/max": 0.97265625, "entropy/mean": 0.44921875, "entropy/min": 0.1865234375, "epoch": 0.235, "grad_norm": 1.1149482972182843, "kl": 0.255859375, "learning_rate": 1.7557495743542582e-06, "loss": 0.0025807444471865892, "memory(GiB)": 146.12, "reward": 1.8418928384780884, "reward_std": 0.12823235988616943, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5464540123939514, "rewards/EvidenceHallucination/std": 0.4399447441101074, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.9323829412460327, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6659352779388428, "rewards/VideoAccuracy/std": 0.5348252654075623, "step": 235, "train_speed(iter/s)": 0.018687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 515.0, "completions/mean_length": 341.6190490722656, "completions/min_length": 239.0, "entropy/max": 0.6015625, "entropy/mean": 0.392578125, "entropy/min": 0.27734375, "epoch": 0.236, "grad_norm": 1.5263174226155418, "kl": 0.34375, "learning_rate": 1.7536676861108164e-06, "loss": 0.0034528947435319424, "memory(GiB)": 146.12, "reward": 1.6555285453796387, "reward_std": 0.21378569304943085, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.435823529958725, "rewards/EvidenceHallucination/std": 0.41382157802581787, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.607731819152832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5683638453483582, "rewards/VideoAccuracy/std": 0.4869895875453949, "step": 236, "train_speed(iter/s)": 0.018676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/mean_length": 373.9285888671875, "completions/min_length": 263.0, "entropy/max": 0.734375, "entropy/mean": 0.40234375, "entropy/min": 0.2265625, "epoch": 0.237, "grad_norm": 1.2119485357062756, "kl": 0.33203125, "learning_rate": 1.7515782084421423e-06, "loss": 0.0033400084357708693, "memory(GiB)": 146.12, "reward": 1.6784098148345947, "reward_std": 0.1474025994539261, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31832629442214966, "rewards/EvidenceHallucination/std": 0.3810774087905884, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 0.4371005594730377, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.5671253204345703, "rewards/VideoAccuracy/std": 0.5303839445114136, "step": 237, "train_speed(iter/s)": 0.018691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1054.0, "completions/mean_length": 481.7857360839844, "completions/min_length": 320.0, "entropy/max": 1.2890625, "entropy/mean": 0.40234375, "entropy/min": 0.1484375, "epoch": 0.238, "grad_norm": 1.307395098001326, "kl": 0.23828125, "learning_rate": 1.749481162389254e-06, "loss": 0.0023901357781141996, "memory(GiB)": 146.12, "reward": 2.0823862552642822, "reward_std": 0.25942322611808777, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3701555132865906, "rewards/EvidenceHallucination/std": 0.4010622501373291, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 0.916046142578125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8750216364860535, "rewards/VideoAccuracy/std": 0.4637349247932434, "step": 238, "train_speed(iter/s)": 0.018631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 596.0, "completions/mean_length": 398.21429443359375, "completions/min_length": 287.0, "entropy/max": 1.046875, "entropy/mean": 0.4609375, "entropy/min": 0.2373046875, "epoch": 0.239, "grad_norm": 1.3285319130495021, "kl": 0.326171875, "learning_rate": 1.747376569069381e-06, "loss": 0.0033058568369597197, "memory(GiB)": 146.12, "reward": 1.411938190460205, "reward_std": 0.4043456017971039, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27397623658180237, "rewards/EvidenceHallucination/std": 0.37728753685951233, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.6717287302017212, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3571428656578064, "rewards/VideoAccuracy/std": 0.48496562242507935, "step": 239, "train_speed(iter/s)": 0.018651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 463.0, "completions/mean_length": 364.1428527832031, "completions/min_length": 291.0, "entropy/max": 0.49609375, "entropy/mean": 0.37890625, "entropy/min": 0.26953125, "epoch": 0.24, "grad_norm": 1.1106519834910449, "kl": 0.318359375, "learning_rate": 1.7452644496757548e-06, "loss": 0.00319494167342782, "memory(GiB)": 146.12, "reward": 1.3215675354003906, "reward_std": 0.18073152005672455, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23566341400146484, "rewards/EvidenceHallucination/std": 0.37131911516189575, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.4843665361404419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2744348645210266, "rewards/VideoAccuracy/std": 0.408074289560318, "step": 240, "train_speed(iter/s)": 0.018586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 825.0, "completions/mean_length": 446.66668701171875, "completions/min_length": 295.0, "entropy/max": 0.46875, "entropy/mean": 0.3046875, "entropy/min": 0.126953125, "epoch": 0.241, "grad_norm": 1.1616853808362892, "kl": 0.2236328125, "learning_rate": 1.743144825477394e-06, "loss": 0.002262769965454936, "memory(GiB)": 146.12, "reward": 2.2303261756896973, "reward_std": 0.12588879466056824, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.51683509349823, "rewards/EvidenceHallucination/std": 0.3724346160888672, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.5768471360206604, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9269590973854065, "rewards/VideoAccuracy/std": 0.41551220417022705, "step": 241, "train_speed(iter/s)": 0.018587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 652.0, "completions/mean_length": 447.452392578125, "completions/min_length": 297.0, "entropy/max": 1.734375, "entropy/mean": 0.5546875, "entropy/min": 0.271484375, "epoch": 0.242, "grad_norm": 1.2606325767889839, "kl": 0.314453125, "learning_rate": 1.7410177178188917e-06, "loss": 0.00319875031709671, "memory(GiB)": 146.12, "reward": 1.6765947341918945, "reward_std": 0.11555620282888412, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40678247809410095, "rewards/EvidenceHallucination/std": 0.35523679852485657, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 1.2653241157531738, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679574370384216, "step": 242, "train_speed(iter/s)": 0.018588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 555.0, "completions/mean_length": 387.4761962890625, "completions/min_length": 297.0, "entropy/max": 0.70703125, "entropy/mean": 0.427734375, "entropy/min": 0.30859375, "epoch": 0.243, "grad_norm": 1.146758509584447, "kl": 0.326171875, "learning_rate": 1.7388831481201976e-06, "loss": 0.0032735601998865604, "memory(GiB)": 146.12, "reward": 1.2293654680252075, "reward_std": 0.21915750205516815, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.1501440852880478, "rewards/EvidenceHallucination/std": 0.3173399567604065, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.8499504923820496, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2112414836883545, "rewards/VideoAccuracy/std": 0.3915502429008484, "step": 243, "train_speed(iter/s)": 0.018593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 538.0, "completions/mean_length": 389.6190490722656, "completions/min_length": 266.0, "entropy/max": 0.52734375, "entropy/mean": 0.369140625, "entropy/min": 0.1630859375, "epoch": 0.244, "grad_norm": 1.500520513978552, "kl": 0.298828125, "learning_rate": 1.7367411378764047e-06, "loss": 0.003024215577170253, "memory(GiB)": 146.12, "reward": 1.8303215503692627, "reward_std": 0.09470437467098236, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44220417737960815, "rewards/EvidenceHallucination/std": 0.4173884987831116, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.6172134280204773, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.6133091449737549, "rewards/VideoAccuracy/std": 0.4946582615375519, "step": 244, "train_speed(iter/s)": 0.018619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 812.0, "completions/mean_length": 507.0714416503906, "completions/min_length": 241.0, "entropy/max": 1.640625, "entropy/mean": 0.51953125, "entropy/min": 0.1669921875, "epoch": 0.245, "grad_norm": 0.9801385937878792, "kl": 0.2333984375, "learning_rate": 1.7345917086575331e-06, "loss": 0.0023636885453015566, "memory(GiB)": 146.12, "reward": 1.4090077877044678, "reward_std": 0.22807496786117554, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.17244532704353333, "rewards/EvidenceHallucination/std": 0.3078490197658539, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 0.8323455452919006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.31261393427848816, "rewards/VideoAccuracy/std": 0.3989967703819275, "step": 245, "train_speed(iter/s)": 0.018623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 540.0, "completions/mean_length": 390.4761962890625, "completions/min_length": 277.0, "entropy/max": 0.80859375, "entropy/mean": 0.4453125, "entropy/min": 0.291015625, "epoch": 0.246, "grad_norm": 1.1444176586472752, "kl": 0.341796875, "learning_rate": 1.7324348821083108e-06, "loss": 0.0034365360625088215, "memory(GiB)": 146.12, "reward": 1.5113457441329956, "reward_std": 0.17555874586105347, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33632904291152954, "rewards/EvidenceHallucination/std": 0.41669946908950806, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.5823577642440796, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4440799057483673, "rewards/VideoAccuracy/std": 0.4973483979701996, "step": 246, "train_speed(iter/s)": 0.018639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/mean_length": 400.4761962890625, "completions/min_length": 324.0, "entropy/max": 0.609375, "entropy/mean": 0.40234375, "entropy/min": 0.267578125, "epoch": 0.247, "grad_norm": 1.3956294467056896, "kl": 0.326171875, "learning_rate": 1.7302706799479574e-06, "loss": 0.0032703722827136517, "memory(GiB)": 146.12, "reward": 1.3936400413513184, "reward_std": 0.27235424518585205, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21763722598552704, "rewards/EvidenceHallucination/std": 0.3722839951515198, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 0.7034013867378235, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.28820788860321045, "rewards/VideoAccuracy/std": 0.41962555050849915, "step": 247, "train_speed(iter/s)": 0.018645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 986.0, "completions/mean_length": 482.90478515625, "completions/min_length": 321.0, "entropy/max": 0.66796875, "entropy/mean": 0.3984375, "entropy/min": 0.166015625, "epoch": 0.248, "grad_norm": 1.2035411174051378, "kl": 0.2216796875, "learning_rate": 1.728099123969964e-06, "loss": 0.002241044072434306, "memory(GiB)": 146.12, "reward": 1.8912005424499512, "reward_std": 0.19596272706985474, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3262063264846802, "rewards/EvidenceHallucination/std": 0.38895130157470703, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.7860573530197144, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6973879337310791, "rewards/VideoAccuracy/std": 0.445387601852417, "step": 248, "train_speed(iter/s)": 0.018639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 503.0, "completions/mean_length": 404.5, "completions/min_length": 288.0, "entropy/max": 0.62109375, "entropy/mean": 0.48828125, "entropy/min": 0.3359375, "epoch": 0.249, "grad_norm": 1.1842889183539216, "kl": 0.322265625, "learning_rate": 1.725920236041876e-06, "loss": 0.003257274627685547, "memory(GiB)": 146.12, "reward": 1.3008848428726196, "reward_std": 0.2445618361234665, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1949000358581543, "rewards/EvidenceHallucination/std": 0.3388776481151581, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.6325473189353943, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.261904776096344, "rewards/VideoAccuracy/std": 0.44500064849853516, "step": 249, "train_speed(iter/s)": 0.018639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 537.0, "completions/mean_length": 397.5952453613281, "completions/min_length": 316.0, "entropy/max": 0.828125, "entropy/mean": 0.42578125, "entropy/min": 0.3125, "epoch": 0.25, "grad_norm": 1.3547388774578903, "kl": 0.322265625, "learning_rate": 1.72373403810507e-06, "loss": 0.0032412242144346237, "memory(GiB)": 146.12, "reward": 1.5807161331176758, "reward_std": 0.27251145243644714, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44168218970298767, "rewards/EvidenceHallucination/std": 0.4541189670562744, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.5436787009239197, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4923795461654663, "rewards/VideoAccuracy/std": 0.4577435851097107, "step": 250, "train_speed(iter/s)": 0.018646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 734.0, "completions/mean_length": 438.40478515625, "completions/min_length": 293.0, "entropy/max": 0.51171875, "entropy/mean": 0.35546875, "entropy/min": 0.166015625, "epoch": 0.251, "grad_norm": 1.307954720419532, "kl": 0.2412109375, "learning_rate": 1.7215405521745355e-06, "loss": 0.002425679238513112, "memory(GiB)": 146.12, "reward": 2.0063116550445557, "reward_std": 0.1874866485595703, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2503293454647064, "rewards/EvidenceHallucination/std": 0.34678173065185547, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.5702659487724304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7562455534934998, "rewards/VideoAccuracy/std": 0.4492575228214264, "step": 251, "train_speed(iter/s)": 0.018647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/mean_length": 421.452392578125, "completions/min_length": 291.0, "entropy/max": 1.421875, "entropy/mean": 0.578125, "entropy/min": 0.341796875, "epoch": 0.252, "grad_norm": 1.4208045132226126, "kl": 0.322265625, "learning_rate": 1.719339800338651e-06, "loss": 0.0032400963827967644, "memory(GiB)": 146.12, "reward": 1.7803417444229126, "reward_std": 0.2757423222064972, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4493274688720703, "rewards/EvidenceHallucination/std": 0.407497763633728, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.5625766515731812, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6904761791229248, "rewards/VideoAccuracy/std": 0.4679011404514313, "step": 252, "train_speed(iter/s)": 0.018641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.0, "completions/mean_length": 394.16668701171875, "completions/min_length": 273.0, "entropy/max": 0.640625, "entropy/mean": 0.451171875, "entropy/min": 0.326171875, "epoch": 0.253, "grad_norm": 1.2929304180915258, "kl": 0.333984375, "learning_rate": 1.7171318047589635e-06, "loss": 0.0033461390994489193, "memory(GiB)": 146.12, "reward": 1.3023897409439087, "reward_std": 0.1982208788394928, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1807071566581726, "rewards/EvidenceHallucination/std": 0.3331444561481476, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.6172134280204773, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2662482261657715, "rewards/VideoAccuracy/std": 0.4432688355445862, "step": 253, "train_speed(iter/s)": 0.018639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 502.0, "completions/mean_length": 377.6428527832031, "completions/min_length": 266.0, "entropy/max": 0.52734375, "entropy/mean": 0.37109375, "entropy/min": 0.1640625, "epoch": 0.254, "grad_norm": 1.3430050273404348, "kl": 0.322265625, "learning_rate": 1.7149165876699635e-06, "loss": 0.00322998920455575, "memory(GiB)": 146.12, "reward": 1.9686601161956787, "reward_std": 0.15321964025497437, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3936600387096405, "rewards/EvidenceHallucination/std": 0.41254332661628723, "rewards/Evidence_Num_Record/mean": 3.1190476417541504, "rewards/Evidence_Num_Record/std": 0.4527628421783447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.756594717502594, "rewards/VideoAccuracy/std": 0.4783211350440979, "step": 254, "train_speed(iter/s)": 0.018631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 841.0, "completions/mean_length": 483.0476379394531, "completions/min_length": 340.0, "entropy/max": 1.984375, "entropy/mean": 0.546875, "entropy/min": 0.1767578125, "epoch": 0.255, "grad_norm": 1.203906055474538, "kl": 0.2314453125, "learning_rate": 1.7126941713788629e-06, "loss": 0.00235724076628685, "memory(GiB)": 146.12, "reward": 1.7237396240234375, "reward_std": 0.3287803530693054, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2043936848640442, "rewards/EvidenceHallucination/std": 0.35046061873435974, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.5174088478088379, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6161943078041077, "rewards/VideoAccuracy/std": 0.4976581335067749, "step": 255, "train_speed(iter/s)": 0.018634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 822.0, "completions/mean_length": 393.9761962890625, "completions/min_length": 288.0, "entropy/max": 0.6953125, "entropy/mean": 0.44140625, "entropy/min": 0.201171875, "epoch": 0.256, "grad_norm": 1.6810269204709323, "kl": 0.302734375, "learning_rate": 1.7104645782653689e-06, "loss": 0.00305356178432703, "memory(GiB)": 146.12, "reward": 1.8969032764434814, "reward_std": 0.14163285493850708, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5417539477348328, "rewards/EvidenceHallucination/std": 0.3971792161464691, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.6357524394989014, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.788552463054657, "rewards/VideoAccuracy/std": 0.4101748764514923, "step": 256, "train_speed(iter/s)": 0.018631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/mean_length": 386.69049072265625, "completions/min_length": 295.0, "entropy/max": 0.7578125, "entropy/mean": 0.4375, "entropy/min": 0.224609375, "epoch": 0.257, "grad_norm": 1.5110203668059161, "kl": 0.322265625, "learning_rate": 1.708227830781459e-06, "loss": 0.0032882890664041042, "memory(GiB)": 146.12, "reward": 1.7894835472106934, "reward_std": 0.3021777272224426, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3878067135810852, "rewards/EvidenceHallucination/std": 0.3837956190109253, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.41739192605018616, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6452555656433105, "rewards/VideoAccuracy/std": 0.49124613404273987, "step": 257, "train_speed(iter/s)": 0.01864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.0, "completions/mean_length": 432.3095397949219, "completions/min_length": 313.0, "entropy/max": 1.3046875, "entropy/mean": 0.4453125, "entropy/min": 0.19140625, "epoch": 0.258, "grad_norm": 1.2616484212255992, "kl": 0.2236328125, "learning_rate": 1.7059839514511562e-06, "loss": 0.0022375385742634535, "memory(GiB)": 146.12, "reward": 1.8460009098052979, "reward_std": 0.15329289436340332, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2919936180114746, "rewards/EvidenceHallucination/std": 0.37274080514907837, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.6325473189353943, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6542688012123108, "rewards/VideoAccuracy/std": 0.48837876319885254, "step": 258, "train_speed(iter/s)": 0.018641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 535.0, "completions/mean_length": 377.1428527832031, "completions/min_length": 295.0, "entropy/max": 0.53515625, "entropy/mean": 0.447265625, "entropy/min": 0.357421875, "epoch": 0.259, "grad_norm": 1.4788345343195048, "kl": 0.326171875, "learning_rate": 1.7037329628703003e-06, "loss": 0.0032539048697799444, "memory(GiB)": 146.12, "reward": 1.6552636623382568, "reward_std": 0.24730446934700012, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41917556524276733, "rewards/EvidenceHallucination/std": 0.41460180282592773, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.538850724697113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5714285969734192, "rewards/VideoAccuracy/std": 0.5008702874183655, "step": 259, "train_speed(iter/s)": 0.018645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 506.0, "completions/mean_length": 352.5, "completions/min_length": 250.0, "entropy/max": 0.84765625, "entropy/mean": 0.482421875, "entropy/min": 0.3359375, "epoch": 0.26, "grad_norm": 1.5800491541258506, "kl": 0.3203125, "learning_rate": 1.7014748877063213e-06, "loss": 0.003226345870643854, "memory(GiB)": 146.12, "reward": 1.6665571928024292, "reward_std": 0.24422809481620789, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42166832089424133, "rewards/EvidenceHallucination/std": 0.4300963580608368, "rewards/Evidence_Num_Record/mean": 2.952380895614624, "rewards/Evidence_Num_Record/std": 0.6228330731391907, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5822235941886902, "rewards/VideoAccuracy/std": 0.46341386437416077, "step": 260, "train_speed(iter/s)": 0.01865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 680.0, "completions/mean_length": 440.23809814453125, "completions/min_length": 329.0, "entropy/max": 0.482421875, "entropy/mean": 0.35546875, "entropy/min": 0.1806640625, "epoch": 0.261, "grad_norm": 1.286282864683331, "kl": 0.234375, "learning_rate": 1.6992097486980106e-06, "loss": 0.0023602007422596216, "memory(GiB)": 146.12, "reward": 2.2474725246429443, "reward_std": 0.10352709889411926, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6212478280067444, "rewards/EvidenceHallucination/std": 0.37116003036499023, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.6228330731391907, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9047619104385376, "rewards/HonestTime/std": 0.297101765871048, "rewards/VideoAccuracy/mean": 0.9422704577445984, "rewards/VideoAccuracy/std": 0.35407426953315735, "step": 261, "train_speed(iter/s)": 0.018637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 806.0, "completions/mean_length": 386.452392578125, "completions/min_length": 230.0, "entropy/max": 0.97265625, "entropy/mean": 0.5078125, "entropy/min": 0.259765625, "epoch": 0.262, "grad_norm": 1.3120554102638122, "kl": 0.330078125, "learning_rate": 1.6969375686552936e-06, "loss": 0.003335272893309593, "memory(GiB)": 146.12, "reward": 1.4105677604675293, "reward_std": 0.2667071521282196, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.26712438464164734, "rewards/EvidenceHallucination/std": 0.37364739179611206, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 0.7696326971054077, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3571428656578064, "rewards/VideoAccuracy/std": 0.48496562242507935, "step": 262, "train_speed(iter/s)": 0.018627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 746.0, "completions/mean_length": 361.3333435058594, "completions/min_length": 248.0, "entropy/max": 0.5859375, "entropy/mean": 0.4296875, "entropy/min": 0.2080078125, "epoch": 0.263, "grad_norm": 1.3998149149303465, "kl": 0.333984375, "learning_rate": 1.6946583704589972e-06, "loss": 0.003358669113367796, "memory(GiB)": 146.12, "reward": 1.6507415771484375, "reward_std": 0.16618609428405762, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4359738528728485, "rewards/EvidenceHallucination/std": 0.40811988711357117, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.41739192605018616, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5635467767715454, "rewards/VideoAccuracy/std": 0.4699776768684387, "step": 263, "train_speed(iter/s)": 0.01863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 479.5, "completions/min_length": 310.0, "entropy/max": 1.1015625, "entropy/mean": 0.3828125, "entropy/min": 0.05859375, "epoch": 0.264, "grad_norm": 1.1863465610849044, "kl": 0.275390625, "learning_rate": 1.6923721770606226e-06, "loss": 0.002848361385986209, "memory(GiB)": 146.12, "reward": 2.009645462036133, "reward_std": 0.15725143253803253, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5580154061317444, "rewards/EvidenceHallucination/std": 0.388560950756073, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.4915074408054352, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.776613712310791, "rewards/VideoAccuracy/std": 0.5218595266342163, "step": 264, "train_speed(iter/s)": 0.01859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 704.0, "completions/mean_length": 439.0714416503906, "completions/min_length": 212.0, "entropy/max": 1.171875, "entropy/mean": 0.451171875, "entropy/min": 0.14453125, "epoch": 0.265, "grad_norm": 0.7861337355114407, "kl": 0.251953125, "learning_rate": 1.690079011482112e-06, "loss": 0.0025292334612458944, "memory(GiB)": 146.12, "reward": 1.294556736946106, "reward_std": 0.17236953973770142, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.043062519282102585, "rewards/EvidenceHallucination/std": 0.15979067981243134, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 0.8082759976387024, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.22403958439826965, "rewards/VideoAccuracy/std": 0.37679558992385864, "step": 265, "train_speed(iter/s)": 0.018598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.0, "completions/mean_length": 352.76190185546875, "completions/min_length": 279.0, "entropy/max": 0.53515625, "entropy/mean": 0.42578125, "entropy/min": 0.2578125, "epoch": 0.266, "grad_norm": 1.2774521106470518, "kl": 0.322265625, "learning_rate": 1.687778896815617e-06, "loss": 0.0032268771901726723, "memory(GiB)": 146.12, "reward": 1.636015772819519, "reward_std": 0.07334038615226746, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41423699259757996, "rewards/EvidenceHallucination/std": 0.4070185422897339, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.3541688024997711, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5531682372093201, "rewards/VideoAccuracy/std": 0.48006635904312134, "step": 266, "train_speed(iter/s)": 0.018604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.0, "completions/mean_length": 374.76190185546875, "completions/min_length": 237.0, "entropy/max": 0.96484375, "entropy/mean": 0.435546875, "entropy/min": 0.27734375, "epoch": 0.267, "grad_norm": 1.1790047383894677, "kl": 0.330078125, "learning_rate": 1.6854718562232666e-06, "loss": 0.0033270521089434624, "memory(GiB)": 146.12, "reward": 1.3987579345703125, "reward_std": 0.21581435203552246, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16781625151634216, "rewards/EvidenceHallucination/std": 0.3076663911342621, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 0.48973196744918823, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.2985280156135559, "rewards/VideoAccuracy/std": 0.49148377776145935, "step": 267, "train_speed(iter/s)": 0.018616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/mean_length": 423.0476379394531, "completions/min_length": 294.0, "entropy/max": 0.88671875, "entropy/mean": 0.3828125, "entropy/min": 0.1328125, "epoch": 0.268, "grad_norm": 1.2475442952274358, "kl": 0.2109375, "learning_rate": 1.6831579129369345e-06, "loss": 0.0021230760030448437, "memory(GiB)": 146.12, "reward": 1.9836636781692505, "reward_std": 0.3484979569911957, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4690280258655548, "rewards/EvidenceHallucination/std": 0.3501991033554077, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.525759220123291, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074110031128, "rewards/VideoAccuracy/mean": 0.7660484313964844, "rewards/VideoAccuracy/std": 0.45203521847724915, "step": 268, "train_speed(iter/s)": 0.018613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/mean_length": 350.952392578125, "completions/min_length": 217.0, "entropy/max": 0.75, "entropy/mean": 0.4453125, "entropy/min": 0.2734375, "epoch": 0.269, "grad_norm": 1.6652848945868766, "kl": 0.349609375, "learning_rate": 1.6808370902580034e-06, "loss": 0.0035270198713988066, "memory(GiB)": 146.12, "reward": 1.8315030336380005, "reward_std": 0.20914201438426971, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5860862135887146, "rewards/EvidenceHallucination/std": 0.4115218222141266, "rewards/Evidence_Num_Record/mean": 3.0714285373687744, "rewards/Evidence_Num_Record/std": 0.4629100263118744, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6809524297714233, "rewards/VideoAccuracy/std": 0.46551209688186646, "step": 269, "train_speed(iter/s)": 0.018597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 786.0, "completions/mean_length": 398.5714416503906, "completions/min_length": 260.0, "entropy/max": 0.7109375, "entropy/mean": 0.431640625, "entropy/min": 0.2216796875, "epoch": 0.27, "grad_norm": 1.407995494388005, "kl": 0.283203125, "learning_rate": 1.6785094115571322e-06, "loss": 0.0028950113337486982, "memory(GiB)": 146.12, "reward": 1.444591999053955, "reward_std": 0.22145162522792816, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3269577622413635, "rewards/EvidenceHallucination/std": 0.42625492811203003, "rewards/Evidence_Num_Record/mean": 3.047619104385376, "rewards/Evidence_Num_Record/std": 0.5388506650924683, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.37920039892196655, "rewards/VideoAccuracy/std": 0.42671406269073486, "step": 270, "train_speed(iter/s)": 0.018595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 767.0, "completions/mean_length": 418.73809814453125, "completions/min_length": 291.0, "entropy/max": 0.52734375, "entropy/mean": 0.345703125, "entropy/min": 0.1669921875, "epoch": 0.271, "grad_norm": 1.2815953898124415, "kl": 0.2236328125, "learning_rate": 1.6761749002740193e-06, "loss": 0.002252672566100955, "memory(GiB)": 146.12, "reward": 2.0878782272338867, "reward_std": 0.1658097207546234, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4907301962375641, "rewards/EvidenceHallucination/std": 0.38563522696495056, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.7054623961448669, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430335700511932, "rewards/VideoAccuracy/mean": 0.7944942116737366, "rewards/VideoAccuracy/std": 0.4951496422290802, "step": 271, "train_speed(iter/s)": 0.018605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 702.0, "completions/mean_length": 355.26190185546875, "completions/min_length": 232.0, "entropy/max": 0.6328125, "entropy/mean": 0.474609375, "entropy/min": 0.265625, "epoch": 0.272, "grad_norm": 1.4149181771197945, "kl": 0.322265625, "learning_rate": 1.6738335799171678e-06, "loss": 0.003463061060756445, "memory(GiB)": 146.12, "reward": 1.6014920473098755, "reward_std": 0.1528625339269638, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3407929837703705, "rewards/EvidenceHallucination/std": 0.3648604154586792, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 0.594203531742096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5, "rewards/VideoAccuracy/std": 0.5060608386993408, "step": 272, "train_speed(iter/s)": 0.018579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 432.0, "completions/mean_length": 317.0714416503906, "completions/min_length": 218.0, "entropy/max": 0.515625, "entropy/mean": 0.44140625, "entropy/min": 0.30078125, "epoch": 0.273, "grad_norm": 1.5898356743054494, "kl": 0.341796875, "learning_rate": 1.6714854740636476e-06, "loss": 0.0034236079081892967, "memory(GiB)": 146.12, "reward": 1.6726698875427246, "reward_std": 0.12765172123908997, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4024078845977783, "rewards/EvidenceHallucination/std": 0.38358187675476074, "rewards/Evidence_Num_Record/mean": 3.0, "rewards/Evidence_Num_Record/std": 0.38254600763320923, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5921885371208191, "rewards/VideoAccuracy/std": 0.4514201283454895, "step": 273, "train_speed(iter/s)": 0.018532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 568.0, "completions/mean_length": 372.6190490722656, "completions/min_length": 307.0, "entropy/max": 0.671875, "entropy/mean": 0.390625, "entropy/min": 0.232421875, "epoch": 0.274, "grad_norm": 1.3714403702806075, "kl": 0.294921875, "learning_rate": 1.669130606358858e-06, "loss": 0.002969046588987112, "memory(GiB)": 146.12, "reward": 1.9991328716278076, "reward_std": 0.25838133692741394, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4453437030315399, "rewards/EvidenceHallucination/std": 0.437023788690567, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.3541688024997711, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.776730477809906, "rewards/VideoAccuracy/std": 0.5350348353385925, "step": 274, "train_speed(iter/s)": 0.018539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 695.0, "completions/mean_length": 371.4285888671875, "completions/min_length": 214.0, "entropy/max": 0.890625, "entropy/mean": 0.4296875, "entropy/min": 0.1552734375, "epoch": 0.275, "grad_norm": 1.4398381008790575, "kl": 0.263671875, "learning_rate": 1.6667690005162916e-06, "loss": 0.002658488228917122, "memory(GiB)": 146.12, "reward": 1.5924758911132812, "reward_std": 0.27572792768478394, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3190845847129822, "rewards/EvidenceHallucination/std": 0.40540894865989685, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.8540400862693787, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.4381828308105469, "rewards/VideoAccuracy/std": 0.4048685133457184, "step": 275, "train_speed(iter/s)": 0.018546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 802.0, "completions/mean_length": 361.2857360839844, "completions/min_length": 232.0, "entropy/max": 0.68359375, "entropy/mean": 0.408203125, "entropy/min": 0.23046875, "epoch": 0.276, "grad_norm": 1.4123614760517333, "kl": 0.291015625, "learning_rate": 1.6644006803172922e-06, "loss": 0.0029547642916440964, "memory(GiB)": 146.12, "reward": 1.5313931703567505, "reward_std": 0.2745862901210785, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3110722601413727, "rewards/EvidenceHallucination/std": 0.3488800823688507, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 0.800696611404419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.46917879581451416, "rewards/VideoAccuracy/std": 0.49849677085876465, "step": 276, "train_speed(iter/s)": 0.018543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 550.0, "completions/mean_length": 369.8333435058594, "completions/min_length": 233.0, "entropy/max": 0.75, "entropy/mean": 0.443359375, "entropy/min": 0.2216796875, "epoch": 0.277, "grad_norm": 1.526393494077956, "kl": 0.310546875, "learning_rate": 1.6620256696108185e-06, "loss": 0.0031316380482167006, "memory(GiB)": 146.12, "reward": 1.6020976305007935, "reward_std": 0.18422286212444305, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33153560757637024, "rewards/EvidenceHallucination/std": 0.41367554664611816, "rewards/Evidence_Num_Record/mean": 2.9761905670166016, "rewards/Evidence_Num_Record/std": 0.5174089074134827, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4691237509250641, "rewards/VideoAccuracy/std": 0.5692328810691833, "step": 277, "train_speed(iter/s)": 0.018558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 555.0, "completions/mean_length": 370.69049072265625, "completions/min_length": 203.0, "entropy/max": 0.859375, "entropy/mean": 0.384765625, "entropy/min": 0.1611328125, "epoch": 0.278, "grad_norm": 1.404825204665656, "kl": 0.2236328125, "learning_rate": 1.6596439923132015e-06, "loss": 0.0022652121260762215, "memory(GiB)": 146.12, "reward": 2.0874264240264893, "reward_std": 0.14774879813194275, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4306017756462097, "rewards/EvidenceHallucination/std": 0.4004756212234497, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.5823577642440796, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8679726123809814, "rewards/VideoAccuracy/std": 0.38059890270233154, "step": 278, "train_speed(iter/s)": 0.018562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 660.0, "completions/mean_length": 355.952392578125, "completions/min_length": 273.0, "entropy/max": 1.3828125, "entropy/mean": 0.5078125, "entropy/min": 0.341796875, "epoch": 0.279, "grad_norm": 1.5862269334906465, "kl": 0.31640625, "learning_rate": 1.6572556724079054e-06, "loss": 0.003191668540239334, "memory(GiB)": 146.12, "reward": 1.796518087387085, "reward_std": 0.189345121383667, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5061548948287964, "rewards/EvidenceHallucination/std": 0.3945556581020355, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.8035924434661865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6619536876678467, "rewards/VideoAccuracy/std": 0.45757246017456055, "step": 279, "train_speed(iter/s)": 0.01855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 741.0, "completions/mean_length": 381.952392578125, "completions/min_length": 231.0, "entropy/max": 0.77734375, "entropy/mean": 0.474609375, "entropy/min": 0.27734375, "epoch": 0.28, "grad_norm": 1.3224319250136158, "kl": 0.296875, "learning_rate": 1.6548607339452852e-06, "loss": 0.003031244268640876, "memory(GiB)": 146.12, "reward": 1.2428820133209229, "reward_std": 0.2718929350376129, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1785127818584442, "rewards/EvidenceHallucination/std": 0.35546743869781494, "rewards/Evidence_Num_Record/mean": 3.047619104385376, "rewards/Evidence_Num_Record/std": 0.5388506650924683, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.20717935264110565, "rewards/VideoAccuracy/std": 0.38427430391311646, "step": 280, "train_speed(iter/s)": 0.018555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 634.0, "completions/mean_length": 415.9285888671875, "completions/min_length": 294.0, "entropy/max": 0.5625, "entropy/mean": 0.34765625, "entropy/min": 0.126953125, "epoch": 0.281, "grad_norm": 1.3244084971751433, "kl": 0.2138671875, "learning_rate": 1.6524592010423442e-06, "loss": 0.0021475343964993954, "memory(GiB)": 146.12, "reward": 2.2283387184143066, "reward_std": 0.22591161727905273, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5387340188026428, "rewards/EvidenceHallucination/std": 0.3905985355377197, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.5868279337882996, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9253537654876709, "rewards/VideoAccuracy/std": 0.3087855279445648, "step": 281, "train_speed(iter/s)": 0.018559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.0, "completions/mean_length": 349.4047546386719, "completions/min_length": 229.0, "entropy/max": 0.8515625, "entropy/mean": 0.53515625, "entropy/min": 0.291015625, "epoch": 0.282, "grad_norm": 1.2315899887328492, "kl": 0.326171875, "learning_rate": 1.6500510978824923e-06, "loss": 0.0032892085146158934, "memory(GiB)": 146.12, "reward": 1.4003338813781738, "reward_std": 0.10361535847187042, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2764706611633301, "rewards/EvidenceHallucination/std": 0.3708851933479309, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.6228330731391907, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3117063641548157, "rewards/VideoAccuracy/std": 0.42241451144218445, "step": 282, "train_speed(iter/s)": 0.018554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/mean_length": 381.66668701171875, "completions/min_length": 282.0, "entropy/max": 0.5625, "entropy/mean": 0.44921875, "entropy/min": 0.328125, "epoch": 0.283, "grad_norm": 1.0584350435835908, "kl": 0.302734375, "learning_rate": 1.6476364487153022e-06, "loss": 0.003029999090358615, "memory(GiB)": 146.12, "reward": 1.4891345500946045, "reward_std": 0.05869223177433014, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31638839840888977, "rewards/EvidenceHallucination/std": 0.3799745440483093, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.49679580330848694, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4258568286895752, "rewards/VideoAccuracy/std": 0.46307775378227234, "step": 283, "train_speed(iter/s)": 0.018545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/mean_length": 387.4761962890625, "completions/min_length": 293.0, "entropy/max": 0.4921875, "entropy/mean": 0.365234375, "entropy/min": 0.1416015625, "epoch": 0.284, "grad_norm": 1.2350425664714775, "kl": 0.298828125, "learning_rate": 1.6452152778562628e-06, "loss": 0.0029805107042193413, "memory(GiB)": 146.12, "reward": 1.9165128469467163, "reward_std": 0.19557994604110718, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5106355547904968, "rewards/EvidenceHallucination/std": 0.43393734097480774, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 0.5008703470230103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6810523271560669, "rewards/VideoAccuracy/std": 0.5946457386016846, "step": 284, "train_speed(iter/s)": 0.018557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 618.0, "completions/mean_length": 406.19049072265625, "completions/min_length": 255.0, "entropy/max": 1.5390625, "entropy/mean": 0.515625, "entropy/min": 0.1884765625, "epoch": 0.285, "grad_norm": 1.4615945800675014, "kl": 0.23828125, "learning_rate": 1.6427876096865393e-06, "loss": 0.002394037786871195, "memory(GiB)": 146.12, "reward": 2.0483367443084717, "reward_std": 0.26414060592651367, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6125563383102417, "rewards/EvidenceHallucination/std": 0.34441256523132324, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.7485952973365784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8258254528045654, "rewards/VideoAccuracy/std": 0.3920741677284241, "step": 285, "train_speed(iter/s)": 0.018574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/mean_length": 338.73809814453125, "completions/min_length": 213.0, "entropy/max": 0.515625, "entropy/mean": 0.419921875, "entropy/min": 0.298828125, "epoch": 0.286, "grad_norm": 0.8613073507067102, "kl": 0.302734375, "learning_rate": 1.6403534686527223e-06, "loss": 0.003025809768587351, "memory(GiB)": 146.12, "reward": 1.3098571300506592, "reward_std": 0.0890624076128006, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18180575966835022, "rewards/EvidenceHallucination/std": 0.32224151492118835, "rewards/Evidence_Num_Record/mean": 3.0714285373687744, "rewards/Evidence_Num_Record/std": 0.4629100561141968, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2734958827495575, "rewards/VideoAccuracy/std": 0.43439802527427673, "step": 286, "train_speed(iter/s)": 0.018531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 564.0, "completions/mean_length": 377.1428527832031, "completions/min_length": 270.0, "entropy/max": 0.5859375, "entropy/mean": 0.42578125, "entropy/min": 0.255859375, "epoch": 0.287, "grad_norm": 1.6139082440394137, "kl": 0.322265625, "learning_rate": 1.6379128792665852e-06, "loss": 0.0032466964330524206, "memory(GiB)": 146.12, "reward": 1.5950889587402344, "reward_std": 0.3116930425167084, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3740018606185913, "rewards/EvidenceHallucination/std": 0.439867228269577, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.532345175743103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.45362186431884766, "rewards/VideoAccuracy/std": 0.4430921971797943, "step": 287, "train_speed(iter/s)": 0.018537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 579.0, "completions/mean_length": 421.4047546386719, "completions/min_length": 306.0, "entropy/max": 0.76953125, "entropy/mean": 0.34765625, "entropy/min": 0.09375, "epoch": 0.288, "grad_norm": 1.1985359286429647, "kl": 0.2119140625, "learning_rate": 1.6354658661048361e-06, "loss": 0.0021414700895547867, "memory(GiB)": 146.12, "reward": 2.0947811603546143, "reward_std": 0.245386004447937, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5193066596984863, "rewards/EvidenceHallucination/std": 0.3565949499607086, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.5037605166435242, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8575864434242249, "rewards/VideoAccuracy/std": 0.4079618752002716, "step": 288, "train_speed(iter/s)": 0.018533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.0, "completions/mean_length": 367.0952453613281, "completions/min_length": 283.0, "entropy/max": 0.609375, "entropy/mean": 0.443359375, "entropy/min": 0.2734375, "epoch": 0.289, "grad_norm": 1.4710639856031327, "kl": 0.298828125, "learning_rate": 1.6330124538088703e-06, "loss": 0.0030165519565343857, "memory(GiB)": 146.12, "reward": 1.7694091796875, "reward_std": 0.21903131902217865, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.47865933179855347, "rewards/EvidenceHallucination/std": 0.3591003715991974, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.5500501394271851, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6403442025184631, "rewards/VideoAccuracy/std": 0.44497716426849365, "step": 289, "train_speed(iter/s)": 0.018538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 610.0, "completions/mean_length": 398.2857360839844, "completions/min_length": 281.0, "entropy/max": 0.5859375, "entropy/mean": 0.435546875, "entropy/min": 0.2578125, "epoch": 0.29, "grad_norm": 1.51714136619265, "kl": 0.287109375, "learning_rate": 1.6305526670845225e-06, "loss": 0.0028732414357364178, "memory(GiB)": 146.12, "reward": 1.5587888956069946, "reward_std": 0.276694655418396, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46286067366600037, "rewards/EvidenceHallucination/std": 0.4573749303817749, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.4843665361404419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.46621668338775635, "rewards/VideoAccuracy/std": 0.4573647677898407, "step": 290, "train_speed(iter/s)": 0.018533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 544.0, "completions/mean_length": 420.21429443359375, "completions/min_length": 290.0, "entropy/max": 0.4921875, "entropy/mean": 0.29296875, "entropy/min": 0.1123046875, "epoch": 0.291, "grad_norm": 1.3266734858103288, "kl": 0.2099609375, "learning_rate": 1.6280865307018174e-06, "loss": 0.0020992772188037634, "memory(GiB)": 146.12, "reward": 2.392334461212158, "reward_std": 0.15745367109775543, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7055816054344177, "rewards/EvidenceHallucination/std": 0.21519999206066132, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.49150747060775757, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.0512181520462036, "rewards/VideoAccuracy/std": 0.19238141179084778, "step": 291, "train_speed(iter/s)": 0.018534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/mean_length": 342.5, "completions/min_length": 239.0, "entropy/max": 1.640625, "entropy/mean": 0.55859375, "entropy/min": 0.3046875, "epoch": 0.292, "grad_norm": 1.497831242594547, "kl": 0.294921875, "learning_rate": 1.6256140694947215e-06, "loss": 0.002976033603772521, "memory(GiB)": 146.12, "reward": 1.3734747171401978, "reward_std": 0.29520106315612793, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19412264227867126, "rewards/EvidenceHallucination/std": 0.33884894847869873, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.7543909549713135, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.30607864260673523, "rewards/VideoAccuracy/std": 0.4278899133205414, "step": 292, "train_speed(iter/s)": 0.018532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 484.0, "completions/mean_length": 350.3809509277344, "completions/min_length": 243.0, "entropy/max": 0.5546875, "entropy/mean": 0.419921875, "entropy/min": 0.326171875, "epoch": 0.293, "grad_norm": 1.0298131574192986, "kl": 0.298828125, "learning_rate": 1.623135308360891e-06, "loss": 0.0029970314353704453, "memory(GiB)": 146.12, "reward": 1.3211218118667603, "reward_std": 0.23074208199977875, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21663756668567657, "rewards/EvidenceHallucination/std": 0.36890122294425964, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 0.4371005594730377, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.2777942419052124, "rewards/VideoAccuracy/std": 0.39285850524902344, "step": 293, "train_speed(iter/s)": 0.018541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 474.0, "completions/mean_length": 365.6190490722656, "completions/min_length": 305.0, "entropy/max": 0.5546875, "entropy/mean": 0.390625, "entropy/min": 0.1396484375, "epoch": 0.294, "grad_norm": 1.4985764630374498, "kl": 0.271484375, "learning_rate": 1.6206502722614236e-06, "loss": 0.002712035086005926, "memory(GiB)": 146.12, "reward": 2.1152074337005615, "reward_std": 0.26597416400909424, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5570864081382751, "rewards/EvidenceHallucination/std": 0.39343953132629395, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 0.39743661880493164, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.8752186298370361, "rewards/VideoAccuracy/std": 0.43338462710380554, "step": 294, "train_speed(iter/s)": 0.01855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 480.0, "completions/mean_length": 361.16668701171875, "completions/min_length": 221.0, "entropy/max": 1.09375, "entropy/mean": 0.45703125, "entropy/min": 0.11767578125, "epoch": 0.295, "grad_norm": 1.525870474835172, "kl": 0.2333984375, "learning_rate": 1.6181589862206052e-06, "loss": 0.0023380776401609182, "memory(GiB)": 146.12, "reward": 2.0736372470855713, "reward_std": 0.33692988753318787, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6441951990127563, "rewards/EvidenceHallucination/std": 0.32928863167762756, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.7635724544525146, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8447983264923096, "rewards/VideoAccuracy/std": 0.40280503034591675, "step": 295, "train_speed(iter/s)": 0.018572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 541.0, "completions/mean_length": 349.28570556640625, "completions/min_length": 251.0, "entropy/max": 0.51953125, "entropy/mean": 0.392578125, "entropy/min": 0.224609375, "epoch": 0.296, "grad_norm": 1.6048282162774623, "kl": 0.328125, "learning_rate": 1.615661475325658e-06, "loss": 0.00313993776217103, "memory(GiB)": 146.12, "reward": 1.466852068901062, "reward_std": 0.3431503474712372, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3205814063549042, "rewards/EvidenceHallucination/std": 0.4040875732898712, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.48496562242507935, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.40273576974868774, "rewards/VideoAccuracy/std": 0.4944770336151123, "step": 296, "train_speed(iter/s)": 0.018534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/mean_length": 343.8809509277344, "completions/min_length": 227.0, "entropy/max": 0.51171875, "entropy/mean": 0.396484375, "entropy/min": 0.26171875, "epoch": 0.297, "grad_norm": 1.5646386644793522, "kl": 0.294921875, "learning_rate": 1.61315776472649e-06, "loss": 0.0029657031409442425, "memory(GiB)": 146.12, "reward": 1.6441454887390137, "reward_std": 0.24957570433616638, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.337864488363266, "rewards/EvidenceHallucination/std": 0.39413684606552124, "rewards/Evidence_Num_Record/mean": 2.9761905670166016, "rewards/Evidence_Num_Record/std": 0.46790117025375366, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5099059343338013, "rewards/VideoAccuracy/std": 0.5179862380027771, "step": 297, "train_speed(iter/s)": 0.018496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 659.0, "completions/mean_length": 439.21429443359375, "completions/min_length": 325.0, "entropy/max": 1.2265625, "entropy/mean": 0.3828125, "entropy/min": 0.1337890625, "epoch": 0.298, "grad_norm": 1.2283899643427931, "kl": 0.19140625, "learning_rate": 1.6106478796354382e-06, "loss": 0.0019259240943938494, "memory(GiB)": 146.12, "reward": 1.9087477922439575, "reward_std": 0.12891468405723572, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5294734239578247, "rewards/EvidenceHallucination/std": 0.3857249617576599, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.8249872326850891, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6695197224617004, "rewards/VideoAccuracy/std": 0.4106285572052002, "step": 298, "train_speed(iter/s)": 0.018501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 627.0, "completions/mean_length": 359.26190185546875, "completions/min_length": 199.0, "entropy/max": 0.71484375, "entropy/mean": 0.4296875, "entropy/min": 0.283203125, "epoch": 0.299, "grad_norm": 1.4108113774763722, "kl": 0.263671875, "learning_rate": 1.608131845327018e-06, "loss": 0.0026504751294851303, "memory(GiB)": 146.12, "reward": 1.3847062587738037, "reward_std": 0.21600672602653503, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2568646967411041, "rewards/EvidenceHallucination/std": 0.4156530499458313, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 1.173903226852417, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4500652849674225, "step": 299, "train_speed(iter/s)": 0.018494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 356.23809814453125, "completions/min_length": 259.0, "entropy/max": 0.55078125, "entropy/mean": 0.404296875, "entropy/min": 0.27734375, "epoch": 0.3, "grad_norm": 1.4286158463874457, "kl": 0.251953125, "learning_rate": 1.6056096871376666e-06, "loss": 0.0025103879161179066, "memory(GiB)": 146.12, "reward": 1.280792236328125, "reward_std": 0.29425978660583496, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1964530497789383, "rewards/EvidenceHallucination/std": 0.3479928970336914, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 1.060591697692871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.24150165915489197, "rewards/VideoAccuracy/std": 0.33755213022232056, "step": 300, "train_speed(iter/s)": 0.018496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 559.0, "completions/mean_length": 384.6190490722656, "completions/min_length": 190.0, "entropy/max": 0.65625, "entropy/mean": 0.3203125, "entropy/min": 0.1259765625, "epoch": 0.301, "grad_norm": 1.3581633235784614, "kl": 0.2041015625, "learning_rate": 1.6030814304654894e-06, "loss": 0.0020680841989815235, "memory(GiB)": 146.12, "reward": 2.094529390335083, "reward_std": 0.1851232498884201, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46943730115890503, "rewards/EvidenceHallucination/std": 0.37510302662849426, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.6325473189353943, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8006417751312256, "rewards/VideoAccuracy/std": 0.34724944829940796, "step": 301, "train_speed(iter/s)": 0.01845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 324.5, "completions/min_length": 212.0, "entropy/max": 1.8515625, "entropy/mean": 0.55078125, "entropy/min": 0.296875, "epoch": 0.302, "grad_norm": 1.6106867892162855, "kl": 0.287109375, "learning_rate": 1.600547100770003e-06, "loss": 0.0028827935457229614, "memory(GiB)": 146.12, "reward": 1.6848338842391968, "reward_std": 0.23833847045898438, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3699362576007843, "rewards/EvidenceHallucination/std": 0.36776798963546753, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.6357524991035461, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5775132775306702, "rewards/VideoAccuracy/std": 0.4651445746421814, "step": 302, "train_speed(iter/s)": 0.018461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/mean_length": 378.3571472167969, "completions/min_length": 257.0, "entropy/max": 0.55859375, "entropy/mean": 0.365234375, "entropy/min": 0.203125, "epoch": 0.303, "grad_norm": 1.1537914865743932, "kl": 0.2421875, "learning_rate": 1.598006723571879e-06, "loss": 0.0024713780730962753, "memory(GiB)": 146.12, "reward": 1.2205116748809814, "reward_std": 0.29441866278648376, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18889151513576508, "rewards/EvidenceHallucination/std": 0.3691125810146332, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 1.8759434223175049, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.18273337185382843, "rewards/VideoAccuracy/std": 0.349786639213562, "step": 303, "train_speed(iter/s)": 0.018444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 545.0, "completions/mean_length": 385.452392578125, "completions/min_length": 317.0, "entropy/max": 0.52734375, "entropy/mean": 0.384765625, "entropy/min": 0.13671875, "epoch": 0.304, "grad_norm": 1.310105879510366, "kl": 0.234375, "learning_rate": 1.595460324452688e-06, "loss": 0.002331523923203349, "memory(GiB)": 146.12, "reward": 1.746359944343567, "reward_std": 0.22891227900981903, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2776835560798645, "rewards/EvidenceHallucination/std": 0.3692076802253723, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.6325473189353943, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.562251627445221, "rewards/VideoAccuracy/std": 0.5393766164779663, "step": 304, "train_speed(iter/s)": 0.018453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 693.0, "completions/mean_length": 394.5952453613281, "completions/min_length": 187.0, "entropy/max": 1.3359375, "entropy/mean": 0.546875, "entropy/min": 0.1357421875, "epoch": 0.305, "grad_norm": 1.4417766793150282, "kl": 0.2099609375, "learning_rate": 1.5929079290546405e-06, "loss": 0.0021169825922697783, "memory(GiB)": 146.12, "reward": 1.793710470199585, "reward_std": 0.28301554918289185, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30675727128982544, "rewards/EvidenceHallucination/std": 0.36037933826446533, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 1.267845869064331, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.632358968257904, "rewards/VideoAccuracy/std": 0.50540691614151, "step": 305, "train_speed(iter/s)": 0.018468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 491.0, "completions/mean_length": 339.69049072265625, "completions/min_length": 240.0, "entropy/max": 0.53125, "entropy/mean": 0.421875, "entropy/min": 0.2890625, "epoch": 0.306, "grad_norm": 1.0434674554954733, "kl": 0.25, "learning_rate": 1.5903495630803298e-06, "loss": 0.002527870936319232, "memory(GiB)": 146.12, "reward": 1.4312735795974731, "reward_std": 0.0730072408914566, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31148022413253784, "rewards/EvidenceHallucination/std": 0.4281456172466278, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.7071067094802856, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.36897751688957214, "rewards/VideoAccuracy/std": 0.47729581594467163, "step": 306, "train_speed(iter/s)": 0.018442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 521.0, "completions/mean_length": 374.69049072265625, "completions/min_length": 282.0, "entropy/max": 0.5703125, "entropy/mean": 0.38671875, "entropy/min": 0.291015625, "epoch": 0.307, "grad_norm": 1.3614605604782124, "kl": 0.26953125, "learning_rate": 1.587785252292473e-06, "loss": 0.0027160055469721556, "memory(GiB)": 146.12, "reward": 1.530571699142456, "reward_std": 0.07783089578151703, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.25006505846977234, "rewards/EvidenceHallucination/std": 0.37279269099235535, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 0.5436787009239197, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.41865381598472595, "rewards/VideoAccuracy/std": 0.43165814876556396, "step": 307, "train_speed(iter/s)": 0.018454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 822.0, "completions/mean_length": 439.71429443359375, "completions/min_length": 288.0, "entropy/max": 1.8828125, "entropy/mean": 0.484375, "entropy/min": 0.125, "epoch": 0.308, "grad_norm": 1.2920309784432153, "kl": 0.17578125, "learning_rate": 1.5852150225136515e-06, "loss": 0.001778631005436182, "memory(GiB)": 146.12, "reward": 2.06137752532959, "reward_std": 0.15827319025993347, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44515714049339294, "rewards/EvidenceHallucination/std": 0.3905835449695587, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.6917465925216675, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8390125036239624, "rewards/VideoAccuracy/std": 0.3652336299419403, "step": 308, "train_speed(iter/s)": 0.018447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 512.0, "completions/mean_length": 363.5, "completions/min_length": 281.0, "entropy/max": 0.57421875, "entropy/mean": 0.419921875, "entropy/min": 0.310546875, "epoch": 0.309, "grad_norm": 1.0898502143495572, "kl": 0.251953125, "learning_rate": 1.5826388996260502e-06, "loss": 0.0025412074755877256, "memory(GiB)": 146.12, "reward": 1.4703563451766968, "reward_std": 0.1100665032863617, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3279728889465332, "rewards/EvidenceHallucination/std": 0.4086807668209076, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.8025076985359192, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4047619104385376, "rewards/VideoAccuracy/std": 0.49679574370384216, "step": 309, "train_speed(iter/s)": 0.018445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/mean_length": 352.66668701171875, "completions/min_length": 201.0, "entropy/max": 0.53515625, "entropy/mean": 0.41796875, "entropy/min": 0.2265625, "epoch": 0.31, "grad_norm": 1.3505541919971262, "kl": 0.2490234375, "learning_rate": 1.5800569095711981e-06, "loss": 0.002500642091035843, "memory(GiB)": 146.12, "reward": 1.4585460424423218, "reward_std": 0.22531914710998535, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3418963849544525, "rewards/EvidenceHallucination/std": 0.4326116442680359, "rewards/Evidence_Num_Record/mean": 3.0714285373687744, "rewards/Evidence_Num_Record/std": 0.7454858422279358, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.390166699886322, "rewards/VideoAccuracy/std": 0.4661102294921875, "step": 310, "train_speed(iter/s)": 0.018447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 548.0, "completions/mean_length": 395.952392578125, "completions/min_length": 276.0, "entropy/max": 0.50390625, "entropy/mean": 0.279296875, "entropy/min": 0.125, "epoch": 0.311, "grad_norm": 1.241576956768926, "kl": 0.1962890625, "learning_rate": 1.5774690783497064e-06, "loss": 0.001979774795472622, "memory(GiB)": 146.12, "reward": 2.5162124633789062, "reward_std": 0.160222128033638, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5431568622589111, "rewards/EvidenceHallucination/std": 0.4012848436832428, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.506060779094696, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.2075811624526978, "rewards/VideoAccuracy/std": 0.202640101313591, "step": 311, "train_speed(iter/s)": 0.018449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 501.0, "completions/mean_length": 381.76190185546875, "completions/min_length": 285.0, "entropy/max": 0.94921875, "entropy/mean": 0.4609375, "entropy/min": 0.251953125, "epoch": 0.312, "grad_norm": 1.5698624921416586, "kl": 0.251953125, "learning_rate": 1.5748754320210072e-06, "loss": 0.0025334805250167847, "memory(GiB)": 146.12, "reward": 1.825404405593872, "reward_std": 0.20616315305233002, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5555937886238098, "rewards/EvidenceHallucination/std": 0.3749423921108246, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.6270148158073425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7142857313156128, "rewards/VideoAccuracy/std": 0.45722994208335876, "step": 312, "train_speed(iter/s)": 0.018452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 601.0, "completions/mean_length": 391.19049072265625, "completions/min_length": 276.0, "entropy/max": 0.57421875, "entropy/mean": 0.421875, "entropy/min": 0.28515625, "epoch": 0.313, "grad_norm": 1.5479755955950403, "kl": 0.2353515625, "learning_rate": 1.5722759967030896e-06, "loss": 0.0023774546571075916, "memory(GiB)": 146.12, "reward": 1.5936198234558105, "reward_std": 0.254268616437912, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.38316085934638977, "rewards/EvidenceHallucination/std": 0.4003508985042572, "rewards/Evidence_Num_Record/mean": 3.238095283508301, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5169876217842102, "rewards/VideoAccuracy/std": 0.4889325499534607, "step": 313, "train_speed(iter/s)": 0.018445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 445.0, "completions/mean_length": 377.5238037109375, "completions/min_length": 261.0, "entropy/max": 0.60546875, "entropy/mean": 0.3984375, "entropy/min": 0.1474609375, "epoch": 0.314, "grad_norm": 1.5740046501554912, "kl": 0.251953125, "learning_rate": 1.5696707985722389e-06, "loss": 0.0025313228834420443, "memory(GiB)": 146.12, "reward": 1.8846992254257202, "reward_std": 0.1694190353155136, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4761403501033783, "rewards/EvidenceHallucination/std": 0.4628937542438507, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.5328903794288635, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.6656614542007446, "rewards/VideoAccuracy/std": 0.5901932120323181, "step": 314, "train_speed(iter/s)": 0.01845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/mean_length": 418.6190490722656, "completions/min_length": 309.0, "entropy/max": 1.03125, "entropy/mean": 0.48828125, "entropy/min": 0.146484375, "epoch": 0.315, "grad_norm": 1.318430944373399, "kl": 0.2294921875, "learning_rate": 1.5670598638627706e-06, "loss": 0.002312577562406659, "memory(GiB)": 146.12, "reward": 1.614781379699707, "reward_std": 0.29396653175354004, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2715763449668884, "rewards/EvidenceHallucination/std": 0.35540372133255005, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9997096061706543, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4937993586063385, "rewards/VideoAccuracy/std": 0.5385206937789917, "step": 315, "train_speed(iter/s)": 0.018447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 558.0, "completions/mean_length": 381.9047546386719, "completions/min_length": 213.0, "entropy/max": 0.5625, "entropy/mean": 0.42578125, "entropy/min": 0.27734375, "epoch": 0.316, "grad_norm": 1.3986195379080097, "kl": 0.25390625, "learning_rate": 1.5644432188667693e-06, "loss": 0.0025634332560002804, "memory(GiB)": 146.12, "reward": 1.4754207134246826, "reward_std": 0.17962013185024261, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29231202602386475, "rewards/EvidenceHallucination/std": 0.39029237627983093, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.7265497446060181, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.38838687539100647, "rewards/VideoAccuracy/std": 0.45132169127464294, "step": 316, "train_speed(iter/s)": 0.018446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 608.0, "completions/mean_length": 402.3809509277344, "completions/min_length": 319.0, "entropy/max": 0.56640625, "entropy/mean": 0.443359375, "entropy/min": 0.322265625, "epoch": 0.317, "grad_norm": 1.6504393361258312, "kl": 0.259765625, "learning_rate": 1.56182088993382e-06, "loss": 0.0026385614182800055, "memory(GiB)": 146.12, "reward": 1.6107887029647827, "reward_std": 0.23265895247459412, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.3710850477218628, "rewards/EvidenceHallucination/std": 0.4066302478313446, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.7419721484184265, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.49133360385894775, "rewards/VideoAccuracy/std": 0.48049360513687134, "step": 317, "train_speed(iter/s)": 0.018443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 636.0, "completions/mean_length": 454.71429443359375, "completions/min_length": 324.0, "entropy/max": 0.59375, "entropy/mean": 0.33203125, "entropy/min": 0.11865234375, "epoch": 0.318, "grad_norm": 1.1005909927617632, "kl": 0.2001953125, "learning_rate": 1.5591929034707466e-06, "loss": 0.002016433048993349, "memory(GiB)": 146.12, "reward": 1.6688334941864014, "reward_std": 0.20153991878032684, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.08643382787704468, "rewards/EvidenceHallucination/std": 0.23969198763370514, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.4703768193721771, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5182134509086609, "rewards/VideoAccuracy/std": 0.5244945287704468, "step": 318, "train_speed(iter/s)": 0.018446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 640.0, "completions/mean_length": 397.0, "completions/min_length": 283.0, "entropy/max": 1.34375, "entropy/mean": 0.494140625, "entropy/min": 0.376953125, "epoch": 0.319, "grad_norm": 1.4631850543181406, "kl": 0.2470703125, "learning_rate": 1.556559285941344e-06, "loss": 0.002471283543854952, "memory(GiB)": 146.12, "reward": 1.7699050903320312, "reward_std": 0.15336094796657562, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5161923766136169, "rewards/EvidenceHallucination/std": 0.39299091696739197, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 1.0801234245300293, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711876034736633, "step": 319, "train_speed(iter/s)": 0.018451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/mean_length": 389.26190185546875, "completions/min_length": 249.0, "entropy/max": 0.578125, "entropy/mean": 0.43359375, "entropy/min": 0.32421875, "epoch": 0.32, "grad_norm": 1.4761331770574069, "kl": 0.255859375, "learning_rate": 1.5539200638661104e-06, "loss": 0.0025728538166731596, "memory(GiB)": 146.12, "reward": 1.4469672441482544, "reward_std": 0.2820085883140564, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2830442786216736, "rewards/EvidenceHallucination/std": 0.39048337936401367, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 0.551631510257721, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3570248782634735, "rewards/VideoAccuracy/std": 0.3700158894062042, "step": 320, "train_speed(iter/s)": 0.018457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 532.0, "completions/mean_length": 413.8095397949219, "completions/min_length": 327.0, "entropy/max": 0.66796875, "entropy/mean": 0.306640625, "entropy/min": 0.1376953125, "epoch": 0.321, "grad_norm": 1.236470466255527, "kl": 0.21484375, "learning_rate": 1.5512752638219832e-06, "loss": 0.0021507274359464645, "memory(GiB)": 146.12, "reward": 2.2372350692749023, "reward_std": 0.14746171236038208, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.38625553250312805, "rewards/EvidenceHallucination/std": 0.3984059989452362, "rewards/Evidence_Num_Record/mean": 3.2142858505249023, "rewards/Evidence_Num_Record/std": 0.7168942093849182, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9766505360603333, "rewards/VideoAccuracy/std": 0.4403650760650635, "step": 321, "train_speed(iter/s)": 0.01846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 638.0, "completions/mean_length": 439.0, "completions/min_length": 321.0, "entropy/max": 1.234375, "entropy/mean": 0.5234375, "entropy/min": 0.298828125, "epoch": 0.322, "grad_norm": 1.2952347326863511, "kl": 0.2578125, "learning_rate": 1.5486249124420701e-06, "loss": 0.0025827204808592796, "memory(GiB)": 146.12, "reward": 1.217543125152588, "reward_std": 0.3603568971157074, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13533399999141693, "rewards/EvidenceHallucination/std": 0.29350516200065613, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 1.007521152496338, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.190476194024086, "rewards/VideoAccuracy/std": 0.39743661880493164, "step": 322, "train_speed(iter/s)": 0.018454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 609.0, "completions/mean_length": 429.9285888671875, "completions/min_length": 305.0, "entropy/max": 0.63671875, "entropy/mean": 0.5, "entropy/min": 0.298828125, "epoch": 0.323, "grad_norm": 1.3586636355187904, "kl": 0.23046875, "learning_rate": 1.545969036415379e-06, "loss": 0.002311261370778084, "memory(GiB)": 146.12, "reward": 1.468063235282898, "reward_std": 0.26232224702835083, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30530834197998047, "rewards/EvidenceHallucination/std": 0.4035953879356384, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.7344991564750671, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.095238097012043, "rewards/HonestTime/std": 0.297101765871048, "rewards/VideoAccuracy/mean": 0.3879539370536804, "rewards/VideoAccuracy/std": 0.4327782094478607, "step": 323, "train_speed(iter/s)": 0.01845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/mean_length": 423.4761962890625, "completions/min_length": 263.0, "entropy/max": 0.5546875, "entropy/mean": 0.41015625, "entropy/min": 0.12890625, "epoch": 0.324, "grad_norm": 1.3775070305346473, "kl": 0.2490234375, "learning_rate": 1.543307662486553e-06, "loss": 0.0024996590800583363, "memory(GiB)": 146.12, "reward": 1.8816767930984497, "reward_std": 0.282106876373291, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41124141216278076, "rewards/EvidenceHallucination/std": 0.4110432267189026, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.547404408454895, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6660952568054199, "rewards/VideoAccuracy/std": 0.5266658663749695, "step": 324, "train_speed(iter/s)": 0.018451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 696.0, "completions/mean_length": 466.1190490722656, "completions/min_length": 324.0, "entropy/max": 0.984375, "entropy/mean": 0.48046875, "entropy/min": 0.1572265625, "epoch": 0.325, "grad_norm": 1.3292958129358652, "kl": 0.2138671875, "learning_rate": 1.5406408174555977e-06, "loss": 0.002168423030525446, "memory(GiB)": 146.12, "reward": 1.9544423818588257, "reward_std": 0.2796865999698639, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37937915325164795, "rewards/EvidenceHallucination/std": 0.33197489380836487, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.845841109752655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8118998408317566, "rewards/VideoAccuracy/std": 0.4680696427822113, "step": 325, "train_speed(iter/s)": 0.018427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 523.0, "completions/mean_length": 409.0, "completions/min_length": 312.0, "entropy/max": 0.57421875, "entropy/mean": 0.44921875, "entropy/min": 0.310546875, "epoch": 0.326, "grad_norm": 1.3407404990910743, "kl": 0.263671875, "learning_rate": 1.5379685281776125e-06, "loss": 0.00263267382979393, "memory(GiB)": 146.12, "reward": 1.5945847034454346, "reward_std": 0.12171037495136261, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39156463742256165, "rewards/EvidenceHallucination/std": 0.4287061393260956, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.6228330731391907, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.48293837904930115, "rewards/VideoAccuracy/std": 0.43847227096557617, "step": 326, "train_speed(iter/s)": 0.018432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 564.0, "completions/mean_length": 421.952392578125, "completions/min_length": 269.0, "entropy/max": 0.57421875, "entropy/mean": 0.439453125, "entropy/min": 0.30078125, "epoch": 0.327, "grad_norm": 1.2325435626664694, "kl": 0.27734375, "learning_rate": 1.5352908215625213e-06, "loss": 0.002784645650535822, "memory(GiB)": 146.12, "reward": 1.6382532119750977, "reward_std": 0.18829719722270966, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3113822638988495, "rewards/EvidenceHallucination/std": 0.36439794301986694, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.5961549282073975, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5093101263046265, "rewards/VideoAccuracy/std": 0.5441337823867798, "step": 327, "train_speed(iter/s)": 0.018436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 848.0, "completions/mean_length": 493.0714416503906, "completions/min_length": 334.0, "entropy/max": 0.61328125, "entropy/mean": 0.37109375, "entropy/min": 0.1357421875, "epoch": 0.328, "grad_norm": 1.026689315116029, "kl": 0.20703125, "learning_rate": 1.5326077245747997e-06, "loss": 0.002078625839203596, "memory(GiB)": 146.12, "reward": 1.901085615158081, "reward_std": 0.16592055559158325, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2927778959274292, "rewards/EvidenceHallucination/std": 0.3463214337825775, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.8821365237236023, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7211014628410339, "rewards/VideoAccuracy/std": 0.5334154367446899, "step": 328, "train_speed(iter/s)": 0.01843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 557.0, "completions/mean_length": 422.73809814453125, "completions/min_length": 288.0, "entropy/max": 1.1640625, "entropy/mean": 0.498046875, "entropy/min": 0.345703125, "epoch": 0.329, "grad_norm": 1.475495745818706, "kl": 0.24609375, "learning_rate": 1.5299192642332049e-06, "loss": 0.0024703675881028175, "memory(GiB)": 146.12, "reward": 1.6978988647460938, "reward_std": 0.22649529576301575, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.4537801444530487, "rewards/EvidenceHallucination/std": 0.3907560408115387, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.7948731780052185, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6190476417541504, "rewards/VideoAccuracy/std": 0.4915074408054352, "step": 329, "train_speed(iter/s)": 0.018436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 594.0, "completions/mean_length": 412.26190185546875, "completions/min_length": 225.0, "entropy/max": 0.640625, "entropy/mean": 0.4609375, "entropy/min": 0.2890625, "epoch": 0.33, "grad_norm": 1.4157097744857878, "kl": 0.25, "learning_rate": 1.5272254676105023e-06, "loss": 0.002520774258300662, "memory(GiB)": 146.12, "reward": 1.5191279649734497, "reward_std": 0.30416056513786316, "rewards/EvidenceFormat/mean": 0.9523809552192688, "rewards/EvidenceFormat/std": 0.21554027497768402, "rewards/EvidenceHallucination/mean": 0.3154602646827698, "rewards/EvidenceHallucination/std": 0.36483141779899597, "rewards/Evidence_Num_Record/mean": 3.1190476417541504, "rewards/Evidence_Num_Record/std": 0.916046142578125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.4560360312461853, "rewards/VideoAccuracy/std": 0.4275130331516266, "step": 330, "train_speed(iter/s)": 0.018441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 581.0, "completions/mean_length": 403.6190490722656, "completions/min_length": 310.0, "entropy/max": 0.49609375, "entropy/mean": 0.34375, "entropy/min": 0.1708984375, "epoch": 0.331, "grad_norm": 1.346354815792557, "kl": 0.234375, "learning_rate": 1.5245263618331943e-06, "loss": 0.0023695288691669703, "memory(GiB)": 146.12, "reward": 2.5452191829681396, "reward_std": 0.1293453872203827, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7590943574905396, "rewards/EvidenceHallucination/std": 0.3576880395412445, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 0.37719547748565674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.193400263786316, "rewards/VideoAccuracy/std": 0.15034785866737366, "step": 331, "train_speed(iter/s)": 0.018447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 617.0, "completions/mean_length": 443.3095397949219, "completions/min_length": 307.0, "entropy/max": 1.1796875, "entropy/mean": 0.60546875, "entropy/min": 0.34375, "epoch": 0.332, "grad_norm": 8.14056828763085, "kl": 0.279296875, "learning_rate": 1.521821974081246e-06, "loss": 0.0028224079869687557, "memory(GiB)": 146.12, "reward": 1.7714673280715942, "reward_std": 0.33833831548690796, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5240030288696289, "rewards/EvidenceHallucination/std": 0.4096086621284485, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.7501451373100281, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 332, "train_speed(iter/s)": 0.018456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 626.0, "completions/mean_length": 430.5476379394531, "completions/min_length": 322.0, "entropy/max": 0.59375, "entropy/mean": 0.458984375, "entropy/min": 0.33984375, "epoch": 0.333, "grad_norm": 1.2195400931695357, "kl": 0.25390625, "learning_rate": 1.519112331587812e-06, "loss": 0.002552357502281666, "memory(GiB)": 146.12, "reward": 1.5343940258026123, "reward_std": 0.27544981241226196, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2723103165626526, "rewards/EvidenceHallucination/std": 0.3592282831668854, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.5702658891677856, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.44659873843193054, "rewards/VideoAccuracy/std": 0.47520163655281067, "step": 333, "train_speed(iter/s)": 0.018431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 507.0, "completions/mean_length": 396.76190185546875, "completions/min_length": 326.0, "entropy/max": 0.69140625, "entropy/mean": 0.416015625, "entropy/min": 0.1455078125, "epoch": 0.334, "grad_norm": 1.1881402793874947, "kl": 0.251953125, "learning_rate": 1.5163974616389618e-06, "loss": 0.0025246115401387215, "memory(GiB)": 146.12, "reward": 1.7349523305892944, "reward_std": 0.06460803747177124, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4099797010421753, "rewards/EvidenceHallucination/std": 0.4215739667415619, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 0.44500061869621277, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5196231603622437, "rewards/VideoAccuracy/std": 0.5055806040763855, "step": 334, "train_speed(iter/s)": 0.018447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 913.0, "completions/mean_length": 506.3095397949219, "completions/min_length": 350.0, "entropy/max": 1.65625, "entropy/mean": 0.5234375, "entropy/min": 0.1611328125, "epoch": 0.335, "grad_norm": 1.016418452628076, "kl": 0.2236328125, "learning_rate": 1.5136773915734064e-06, "loss": 0.002261554356664419, "memory(GiB)": 146.12, "reward": 1.7054145336151123, "reward_std": 0.10020820796489716, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32015758752822876, "rewards/EvidenceHallucination/std": 0.3698805868625641, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.7970489263534546, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5747164487838745, "rewards/VideoAccuracy/std": 0.4819638133049011, "step": 335, "train_speed(iter/s)": 0.018439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 591.0, "completions/mean_length": 431.9761962890625, "completions/min_length": 318.0, "entropy/max": 0.68359375, "entropy/mean": 0.478515625, "entropy/min": 0.34765625, "epoch": 0.336, "grad_norm": 1.3505539461602332, "kl": 0.25390625, "learning_rate": 1.5109521487822206e-06, "loss": 0.00255573564209044, "memory(GiB)": 146.12, "reward": 1.6756770610809326, "reward_std": 0.17967942357063293, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4279189109802246, "rewards/EvidenceHallucination/std": 0.42436590790748596, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.5037605166435242, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777005434036255, "rewards/VideoAccuracy/mean": 0.5662835240364075, "rewards/VideoAccuracy/std": 0.449936181306839, "step": 336, "train_speed(iter/s)": 0.018441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 551.0, "completions/mean_length": 421.21429443359375, "completions/min_length": 294.0, "entropy/max": 0.58203125, "entropy/mean": 0.453125, "entropy/min": 0.359375, "epoch": 0.337, "grad_norm": 1.5515733832419425, "kl": 0.275390625, "learning_rate": 1.508221760708569e-06, "loss": 0.0027671372517943382, "memory(GiB)": 146.12, "reward": 1.931458592414856, "reward_std": 0.28220516443252563, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4907691776752472, "rewards/EvidenceHallucination/std": 0.40087586641311646, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.5823577642440796, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.7333047389984131, "rewards/VideoAccuracy/std": 0.5154536962509155, "step": 337, "train_speed(iter/s)": 0.018447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 858.0, "completions/mean_length": 481.69049072265625, "completions/min_length": 356.0, "entropy/max": 0.5859375, "entropy/mean": 0.36328125, "entropy/min": 0.1474609375, "epoch": 0.338, "grad_norm": 1.1877824818529439, "kl": 0.2041015625, "learning_rate": 1.5054862548474297e-06, "loss": 0.0020632906816899776, "memory(GiB)": 146.12, "reward": 1.9895719289779663, "reward_std": 0.166742205619812, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4769146740436554, "rewards/EvidenceHallucination/std": 0.34906309843063354, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.7650920152664185, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7608555555343628, "rewards/VideoAccuracy/std": 0.3460628092288971, "step": 338, "train_speed(iter/s)": 0.018438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/mean_length": 427.5714416503906, "completions/min_length": 296.0, "entropy/max": 0.82421875, "entropy/mean": 0.5, "entropy/min": 0.302734375, "epoch": 0.339, "grad_norm": 1.2548089464891523, "kl": 0.275390625, "learning_rate": 1.5027456587453158e-06, "loss": 0.0027769131120294333, "memory(GiB)": 146.12, "reward": 1.5119894742965698, "reward_std": 0.31302163004875183, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3102223873138428, "rewards/EvidenceHallucination/std": 0.3759114146232605, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.592735767364502, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.4213736355304718, "rewards/VideoAccuracy/std": 0.4750100076198578, "step": 339, "train_speed(iter/s)": 0.018433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 567.0, "completions/mean_length": 436.0238037109375, "completions/min_length": 277.0, "entropy/max": 0.640625, "entropy/mean": 0.4375, "entropy/min": 0.2333984375, "epoch": 0.34, "grad_norm": 1.384410392745363, "kl": 0.259765625, "learning_rate": 1.5e-06, "loss": 0.002623537089675665, "memory(GiB)": 146.12, "reward": 1.6347346305847168, "reward_std": 0.3242671489715576, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4092910587787628, "rewards/EvidenceHallucination/std": 0.47143739461898804, "rewards/Evidence_Num_Record/mean": 3.095238208770752, "rewards/Evidence_Num_Record/std": 0.6555401086807251, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.4909716248512268, "rewards/VideoAccuracy/std": 0.4699263572692871, "step": 340, "train_speed(iter/s)": 0.018442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 625.0, "completions/mean_length": 433.452392578125, "completions/min_length": 274.0, "entropy/max": 0.90625, "entropy/mean": 0.373046875, "entropy/min": 0.1474609375, "epoch": 0.341, "grad_norm": 1.4876821617864908, "kl": 0.234375, "learning_rate": 1.4972493062602354e-06, "loss": 0.0023775191511958838, "memory(GiB)": 146.12, "reward": 1.7248107194900513, "reward_std": 0.27018630504608154, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24774467945098877, "rewards/EvidenceHallucination/std": 0.3780077397823334, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.8570944666862488, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.4847853481769562, "rewards/VideoAccuracy/std": 0.438723087310791, "step": 341, "train_speed(iter/s)": 0.018437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 591.0, "completions/mean_length": 453.69049072265625, "completions/min_length": 278.0, "entropy/max": 0.6484375, "entropy/mean": 0.52734375, "entropy/min": 0.400390625, "epoch": 0.342, "grad_norm": 1.1801274023177015, "kl": 0.255859375, "learning_rate": 1.4944936052254768e-06, "loss": 0.002568014431744814, "memory(GiB)": 146.12, "reward": 1.3495653867721558, "reward_std": 0.19368955492973328, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2002081274986267, "rewards/EvidenceHallucination/std": 0.32005855441093445, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.7669872641563416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3095238208770752, "rewards/VideoAccuracy/std": 0.4679011106491089, "step": 342, "train_speed(iter/s)": 0.018432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 546.0, "completions/mean_length": 399.8333435058594, "completions/min_length": 298.0, "entropy/max": 0.6171875, "entropy/mean": 0.447265625, "entropy/min": 0.2890625, "epoch": 0.343, "grad_norm": 1.288550888438917, "kl": 0.263671875, "learning_rate": 1.491732924645604e-06, "loss": 0.002660168334841728, "memory(GiB)": 146.12, "reward": 1.5326708555221558, "reward_std": 0.11299590766429901, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3803858458995819, "rewards/EvidenceHallucination/std": 0.4688884913921356, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 0.5372316837310791, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.39945095777511597, "rewards/VideoAccuracy/std": 0.46400779485702515, "step": 343, "train_speed(iter/s)": 0.01843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 592.0, "completions/mean_length": 442.3809509277344, "completions/min_length": 267.0, "entropy/max": 0.5390625, "entropy/mean": 0.42578125, "entropy/min": 0.1796875, "epoch": 0.344, "grad_norm": 1.2768522713214825, "kl": 0.287109375, "learning_rate": 1.4889672923206388e-06, "loss": 0.0028895996510982513, "memory(GiB)": 146.12, "reward": 2.0022213459014893, "reward_std": 0.03883177042007446, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5681445002555847, "rewards/EvidenceHallucination/std": 0.41701605916023254, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.6325473189353943, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.721925675868988, "rewards/VideoAccuracy/std": 0.512051522731781, "step": 344, "train_speed(iter/s)": 0.018425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 703.0, "completions/mean_length": 450.5, "completions/min_length": 318.0, "entropy/max": 1.8203125, "entropy/mean": 0.49609375, "entropy/min": 0.109375, "epoch": 0.345, "grad_norm": 1.1776170050554418, "kl": 0.2392578125, "learning_rate": 1.4861967361004686e-06, "loss": 0.002416463103145361, "memory(GiB)": 146.12, "reward": 1.768653154373169, "reward_std": 0.19547991454601288, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4626978933811188, "rewards/EvidenceHallucination/std": 0.39814016222953796, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 1.1168646812438965, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.60944664478302, "rewards/VideoAccuracy/std": 0.4475680887699127, "step": 345, "train_speed(iter/s)": 0.01843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/mean_length": 382.7857360839844, "completions/min_length": 185.0, "entropy/max": 0.6484375, "entropy/mean": 0.4453125, "entropy/min": 0.287109375, "epoch": 0.346, "grad_norm": 1.6279893978244946, "kl": 0.267578125, "learning_rate": 1.4834212838845636e-06, "loss": 0.0027270361315459013, "memory(GiB)": 146.12, "reward": 1.7766560316085815, "reward_std": 0.3405471742153168, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5174556374549866, "rewards/EvidenceHallucination/std": 0.40616753697395325, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 0.5868279337882996, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.6255457997322083, "rewards/VideoAccuracy/std": 0.4321475327014923, "step": 346, "train_speed(iter/s)": 0.018437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 516.0, "completions/mean_length": 401.952392578125, "completions/min_length": 297.0, "entropy/max": 0.51953125, "entropy/mean": 0.443359375, "entropy/min": 0.283203125, "epoch": 0.347, "grad_norm": 1.4556253563734596, "kl": 0.283203125, "learning_rate": 1.4806409636216973e-06, "loss": 0.002829810604453087, "memory(GiB)": 146.12, "reward": 1.8094658851623535, "reward_std": 0.3720260560512543, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4109620749950409, "rewards/EvidenceHallucination/std": 0.42193925380706787, "rewards/Evidence_Num_Record/mean": 3.1666667461395264, "rewards/Evidence_Num_Record/std": 0.5372316837310791, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6272733807563782, "rewards/VideoAccuracy/std": 0.5630853772163391, "step": 347, "train_speed(iter/s)": 0.018433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 479.5714416503906, "completions/min_length": 255.0, "entropy/max": 0.7578125, "entropy/mean": 0.3515625, "entropy/min": 0.126953125, "epoch": 0.348, "grad_norm": 1.2545353061090596, "kl": 0.1962890625, "learning_rate": 1.4778558033096631e-06, "loss": 0.0021742568351328373, "memory(GiB)": 146.12, "reward": 1.7992125749588013, "reward_std": 0.30752599239349365, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.3265472948551178, "rewards/EvidenceHallucination/std": 0.43388867378234863, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.8621610999107361, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6291413307189941, "rewards/VideoAccuracy/std": 0.40418609976768494, "step": 348, "train_speed(iter/s)": 0.018405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 573.0, "completions/mean_length": 394.3333435058594, "completions/min_length": 241.0, "entropy/max": 0.640625, "entropy/mean": 0.47265625, "entropy/min": 0.279296875, "epoch": 0.349, "grad_norm": 1.5412409720252533, "kl": 0.294921875, "learning_rate": 1.475065830994995e-06, "loss": 0.0029690172523260117, "memory(GiB)": 146.12, "reward": 1.7570728063583374, "reward_std": 0.12371524423360825, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5494441390037537, "rewards/EvidenceHallucination/std": 0.4043802320957184, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.5328903794288635, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.6186124682426453, "rewards/VideoAccuracy/std": 0.4527876675128937, "step": 349, "train_speed(iter/s)": 0.018413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 539.0, "completions/mean_length": 397.8095397949219, "completions/min_length": 273.0, "entropy/max": 0.6796875, "entropy/mean": 0.44140625, "entropy/min": 0.330078125, "epoch": 0.35, "grad_norm": 1.4894202446535798, "kl": 0.2734375, "learning_rate": 1.4722710747726827e-06, "loss": 0.002753614215180278, "memory(GiB)": 146.12, "reward": 1.7065989971160889, "reward_std": 0.36027830839157104, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42857760190963745, "rewards/EvidenceHallucination/std": 0.4263991713523865, "rewards/Evidence_Num_Record/mean": 3.2857143878936768, "rewards/Evidence_Num_Record/std": 0.6730242371559143, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.563740611076355, "rewards/VideoAccuracy/std": 0.5110309720039368, "step": 350, "train_speed(iter/s)": 0.018423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 740.0, "completions/mean_length": 457.26190185546875, "completions/min_length": 300.0, "entropy/max": 0.875, "entropy/mean": 0.37890625, "entropy/min": 0.08740234375, "epoch": 0.351, "grad_norm": 1.3644218811234954, "kl": 0.2255859375, "learning_rate": 1.4694715627858908e-06, "loss": 0.002268692012876272, "memory(GiB)": 146.12, "reward": 2.1510634422302246, "reward_std": 0.22360464930534363, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4802687466144562, "rewards/EvidenceHallucination/std": 0.3883917033672333, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.7948732376098633, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.8645331859588623, "rewards/VideoAccuracy/std": 0.33970150351524353, "step": 351, "train_speed(iter/s)": 0.018436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 547.0, "completions/mean_length": 436.8571472167969, "completions/min_length": 341.0, "entropy/max": 0.69140625, "entropy/mean": 0.49609375, "entropy/min": 0.353515625, "epoch": 0.352, "grad_norm": 1.3208956824327278, "kl": 0.2734375, "learning_rate": 1.4666673232256737e-06, "loss": 0.0027419989928603172, "memory(GiB)": 146.12, "reward": 1.7679606676101685, "reward_std": 0.24578127264976501, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5064693689346313, "rewards/EvidenceHallucination/std": 0.3789246678352356, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.6228330731391907, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711876034736633, "step": 352, "train_speed(iter/s)": 0.018404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 553.0, "completions/mean_length": 401.7857360839844, "completions/min_length": 260.0, "entropy/max": 0.57421875, "entropy/mean": 0.46875, "entropy/min": 0.353515625, "epoch": 0.353, "grad_norm": 1.600266503025139, "kl": 0.271484375, "learning_rate": 1.4638583843306926e-06, "loss": 0.0027310168370604515, "memory(GiB)": 146.12, "reward": 1.419572114944458, "reward_std": 0.20986318588256836, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.18792571127414703, "rewards/EvidenceHallucination/std": 0.3676707148551941, "rewards/Evidence_Num_Record/mean": 3.190476179122925, "rewards/Evidence_Num_Record/std": 0.5054867267608643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.3200821578502655, "rewards/VideoAccuracy/std": 0.4034067690372467, "step": 353, "train_speed(iter/s)": 0.018405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 435.90478515625, "completions/min_length": 273.0, "entropy/max": 0.53125, "entropy/mean": 0.404296875, "entropy/min": 0.201171875, "epoch": 0.354, "grad_norm": 1.24190944601256, "kl": 0.27734375, "learning_rate": 1.4610447743869313e-06, "loss": 0.0027868878096342087, "memory(GiB)": 146.12, "reward": 2.0171663761138916, "reward_std": 0.1643851101398468, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4571863114833832, "rewards/EvidenceHallucination/std": 0.4068146049976349, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.8006965517997742, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.785714328289032, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 0.7685860991477966, "rewards/VideoAccuracy/std": 0.5048638582229614, "step": 354, "train_speed(iter/s)": 0.018414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 712.0, "completions/mean_length": 472.5238037109375, "completions/min_length": 285.0, "entropy/max": 1.5859375, "entropy/mean": 0.46484375, "entropy/min": 0.1982421875, "epoch": 0.355, "grad_norm": 1.25497763523173, "kl": 0.2275390625, "learning_rate": 1.4582265217274103e-06, "loss": 0.002319670282304287, "memory(GiB)": 146.12, "reward": 1.831558108329773, "reward_std": 0.24632790684700012, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.32749611139297485, "rewards/EvidenceHallucination/std": 0.32687005400657654, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.9106416702270508, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.7160588502883911, "rewards/VideoAccuracy/std": 0.4168870151042938, "step": 355, "train_speed(iter/s)": 0.01841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 551.0, "completions/mean_length": 414.4761962890625, "completions/min_length": 238.0, "entropy/max": 0.6328125, "entropy/mean": 0.45703125, "entropy/min": 0.27734375, "epoch": 0.356, "grad_norm": 1.2905006201438967, "kl": 0.2734375, "learning_rate": 1.4554036547319032e-06, "loss": 0.0027824200224131346, "memory(GiB)": 146.12, "reward": 1.5569792985916138, "reward_std": 0.2874768376350403, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3323138952255249, "rewards/EvidenceHallucination/std": 0.4390609562397003, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.5823577642440796, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.4286118447780609, "rewards/VideoAccuracy/std": 0.42774662375450134, "step": 356, "train_speed(iter/s)": 0.01841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 544.0, "completions/mean_length": 408.6428527832031, "completions/min_length": 325.0, "entropy/max": 0.6015625, "entropy/mean": 0.45703125, "entropy/min": 0.349609375, "epoch": 0.357, "grad_norm": 1.4640627839195135, "kl": 0.296875, "learning_rate": 1.4525762018266483e-06, "loss": 0.0029886546544730663, "memory(GiB)": 146.12, "reward": 1.5981336832046509, "reward_std": 0.26221147179603577, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2828736901283264, "rewards/EvidenceHallucination/std": 0.40860292315483093, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.47711870074272156, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.4415588974952698, "rewards/VideoAccuracy/std": 0.4583386778831482, "step": 357, "train_speed(iter/s)": 0.018429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 767.0, "completions/mean_length": 441.3095397949219, "completions/min_length": 238.0, "entropy/max": 0.62890625, "entropy/mean": 0.359375, "entropy/min": 0.076171875, "epoch": 0.358, "grad_norm": 1.0219281720960793, "kl": 0.21875, "learning_rate": 1.4497441914840657e-06, "loss": 0.00220364797860384, "memory(GiB)": 146.12, "reward": 2.045262098312378, "reward_std": 0.19890999794006348, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44825780391693115, "rewards/EvidenceHallucination/std": 0.3905707001686096, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.7981540560722351, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8222770690917969, "rewards/VideoAccuracy/std": 0.4843039810657501, "step": 358, "train_speed(iter/s)": 0.01838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 571.0, "completions/mean_length": 430.5714416503906, "completions/min_length": 291.0, "entropy/max": 0.81640625, "entropy/mean": 0.455078125, "entropy/min": 0.330078125, "epoch": 0.359, "grad_norm": 1.3777633606416004, "kl": 0.279296875, "learning_rate": 1.4469076522224682e-06, "loss": 0.0028131790459156036, "memory(GiB)": 146.12, "reward": 1.8447340726852417, "reward_std": 0.18595537543296814, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.556718647480011, "rewards/EvidenceHallucination/std": 0.36465418338775635, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 0.8570944666862488, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.7048189043998718, "rewards/VideoAccuracy/std": 0.41710811853408813, "step": 359, "train_speed(iter/s)": 0.018383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 545.0, "completions/mean_length": 396.3333435058594, "completions/min_length": 77.0, "entropy/max": 0.62109375, "entropy/mean": 0.44921875, "entropy/min": 0.310546875, "epoch": 0.36, "grad_norm": 1.54607411816935, "kl": 0.265625, "learning_rate": 1.4440666126057741e-06, "loss": 0.0026596831157803535, "memory(GiB)": 146.12, "reward": 1.447451114654541, "reward_std": 0.31365180015563965, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.28275731205940247, "rewards/EvidenceHallucination/std": 0.4097527265548706, "rewards/Evidence_Num_Record/mean": 3.095238208770752, "rewards/Evidence_Num_Record/std": 0.7261500358581543, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.355185329914093, "rewards/VideoAccuracy/std": 0.3081769049167633, "step": 360, "train_speed(iter/s)": 0.018387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 627.0, "completions/mean_length": 434.19049072265625, "completions/min_length": 296.0, "entropy/max": 0.49609375, "entropy/mean": 0.333984375, "entropy/min": 0.154296875, "epoch": 0.361, "grad_norm": 1.3485145572791253, "kl": 0.2421875, "learning_rate": 1.4412211012432211e-06, "loss": 0.002424489473924041, "memory(GiB)": 146.12, "reward": 2.1414265632629395, "reward_std": 0.09960927814245224, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42047956585884094, "rewards/EvidenceHallucination/std": 0.4049188792705536, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.49679577350616455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8620923757553101, "rewards/VideoAccuracy/std": 0.4532416760921478, "step": 361, "train_speed(iter/s)": 0.018388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1192.0, "completions/mean_length": 459.5952453613281, "completions/min_length": 198.0, "entropy/max": 0.9140625, "entropy/mean": 0.482421875, "entropy/min": 0.22265625, "epoch": 0.362, "grad_norm": 1.4207204595844491, "kl": 0.24609375, "learning_rate": 1.4383711467890773e-06, "loss": 0.002514174208045006, "memory(GiB)": 146.12, "reward": 1.8118293285369873, "reward_std": 0.18444190919399261, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48771950602531433, "rewards/EvidenceHallucination/std": 0.3601215183734894, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 2.155402660369873, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7142857313156128, "rewards/VideoAccuracy/std": 0.45722997188568115, "step": 362, "train_speed(iter/s)": 0.018387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 701.0, "completions/mean_length": 408.0714416503906, "completions/min_length": 306.0, "entropy/max": 0.5625, "entropy/mean": 0.4453125, "entropy/min": 0.265625, "epoch": 0.363, "grad_norm": 1.4738279134737888, "kl": 0.279296875, "learning_rate": 1.4355167779423524e-06, "loss": 0.0028174584731459618, "memory(GiB)": 146.12, "reward": 1.5743744373321533, "reward_std": 0.2246859073638916, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.26714199781417847, "rewards/EvidenceHallucination/std": 0.3937433660030365, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.8621610999107361, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.46380311250686646, "rewards/VideoAccuracy/std": 0.3977683186531067, "step": 363, "train_speed(iter/s)": 0.018393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 519.0, "completions/mean_length": 406.0476379394531, "completions/min_length": 270.0, "entropy/max": 0.47265625, "entropy/mean": 0.396484375, "entropy/min": 0.2373046875, "epoch": 0.364, "grad_norm": 1.3102302939531227, "kl": 0.275390625, "learning_rate": 1.4326580234465083e-06, "loss": 0.002773560583591461, "memory(GiB)": 146.12, "reward": 2.0157859325408936, "reward_std": 0.21028606593608856, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3226657509803772, "rewards/EvidenceHallucination/std": 0.4021158814430237, "rewards/Evidence_Num_Record/mean": 3.261904716491699, "rewards/Evidence_Num_Record/std": 0.49679577350616455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7845862507820129, "rewards/VideoAccuracy/std": 0.5455461740493774, "step": 364, "train_speed(iter/s)": 0.018395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1009.0, "completions/mean_length": 495.5714416503906, "completions/min_length": 293.0, "entropy/max": 1.1484375, "entropy/mean": 0.47265625, "entropy/min": 0.1728515625, "epoch": 0.365, "grad_norm": 1.2134787504452507, "kl": 0.2158203125, "learning_rate": 1.4297949120891716e-06, "loss": 0.002195878652855754, "memory(GiB)": 146.12, "reward": 1.5721949338912964, "reward_std": 0.3115766644477844, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1513204425573349, "rewards/EvidenceHallucination/std": 0.2872285544872284, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.890129566192627, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.47526416182518005, "rewards/VideoAccuracy/std": 0.5100093483924866, "step": 365, "train_speed(iter/s)": 0.018384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 594.0, "completions/mean_length": 428.8333435058594, "completions/min_length": 260.0, "entropy/max": 0.72265625, "entropy/mean": 0.45703125, "entropy/min": 0.30078125, "epoch": 0.366, "grad_norm": 1.4126048684373906, "kl": 0.27734375, "learning_rate": 1.4269274727018417e-06, "loss": 0.002791311126202345, "memory(GiB)": 146.12, "reward": 1.5688774585723877, "reward_std": 0.2685014009475708, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29032036662101746, "rewards/EvidenceHallucination/std": 0.3536563515663147, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.8981204032897949, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108052015304565, "rewards/VideoAccuracy/mean": 0.4631943106651306, "rewards/VideoAccuracy/std": 0.43064507842063904, "step": 366, "train_speed(iter/s)": 0.018385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 634.0, "completions/mean_length": 449.9761962890625, "completions/min_length": 330.0, "entropy/max": 0.55078125, "entropy/mean": 0.412109375, "entropy/min": 0.314453125, "epoch": 0.367, "grad_norm": 1.462879261342147, "kl": 0.28515625, "learning_rate": 1.4240557341596018e-06, "loss": 0.0028515085577964783, "memory(GiB)": 146.12, "reward": 1.756030559539795, "reward_std": 0.29996487498283386, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37230175733566284, "rewards/EvidenceHallucination/std": 0.39328497648239136, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 0.6270147562026978, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.5863322019577026, "rewards/VideoAccuracy/std": 0.5068567991256714, "step": 367, "train_speed(iter/s)": 0.01838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 792.0, "completions/mean_length": 501.3095397949219, "completions/min_length": 292.0, "entropy/max": 0.83984375, "entropy/mean": 0.357421875, "entropy/min": 0.1416015625, "epoch": 0.368, "grad_norm": 1.313619431119182, "kl": 0.21875, "learning_rate": 1.4211797253808267e-06, "loss": 0.002218235284090042, "memory(GiB)": 146.12, "reward": 1.8827509880065918, "reward_std": 0.2703996002674103, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2761436700820923, "rewards/EvidenceHallucination/std": 0.40353676676750183, "rewards/Evidence_Num_Record/mean": 4.309524059295654, "rewards/Evidence_Num_Record/std": 0.8968262672424316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6989507079124451, "rewards/VideoAccuracy/std": 0.5381085872650146, "step": 368, "train_speed(iter/s)": 0.018374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 798.0, "completions/mean_length": 452.2857360839844, "completions/min_length": 305.0, "entropy/max": 1.203125, "entropy/mean": 0.4609375, "entropy/min": 0.291015625, "epoch": 0.369, "grad_norm": 1.525129645684774, "kl": 0.26171875, "learning_rate": 1.4182994753268926e-06, "loss": 0.0026336682494729757, "memory(GiB)": 146.12, "reward": 1.810465931892395, "reward_std": 0.322182834148407, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5285199284553528, "rewards/EvidenceHallucination/std": 0.3964601457118988, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.1559572219848633, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.6761904954910278, "rewards/VideoAccuracy/std": 0.45468270778656006, "step": 369, "train_speed(iter/s)": 0.018374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 759.0, "completions/mean_length": 450.19049072265625, "completions/min_length": 281.0, "entropy/max": 0.56640625, "entropy/mean": 0.40625, "entropy/min": 0.291015625, "epoch": 0.37, "grad_norm": 1.34759416208457, "kl": 0.26171875, "learning_rate": 1.4154150130018865e-06, "loss": 0.002618623897433281, "memory(GiB)": 146.12, "reward": 1.4079593420028687, "reward_std": 0.1855965107679367, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19020766019821167, "rewards/EvidenceHallucination/std": 0.3501608073711395, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 1.2211835384368896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.3080129325389862, "rewards/VideoAccuracy/std": 0.49778202176094055, "step": 370, "train_speed(iter/s)": 0.018392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 995.0, "completions/mean_length": 527.6666870117188, "completions/min_length": 323.0, "entropy/max": 0.60546875, "entropy/mean": 0.34375, "entropy/min": 0.12890625, "epoch": 0.371, "grad_norm": 1.1374302781371923, "kl": 0.2216796875, "learning_rate": 1.4125263674523112e-06, "loss": 0.002231322694569826, "memory(GiB)": 146.12, "reward": 2.184678316116333, "reward_std": 0.33372053503990173, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4899149537086487, "rewards/EvidenceHallucination/std": 0.42629364132881165, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.1716750860214233, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8866953253746033, "rewards/VideoAccuracy/std": 0.4119216501712799, "step": 371, "train_speed(iter/s)": 0.018389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 934.0, "completions/mean_length": 519.1666870117188, "completions/min_length": 362.0, "entropy/max": 1.6640625, "entropy/mean": 0.61328125, "entropy/min": 0.341796875, "epoch": 0.372, "grad_norm": 1.1000883432113129, "kl": 0.255859375, "learning_rate": 1.4096335677667951e-06, "loss": 0.0026076710782945156, "memory(GiB)": 146.12, "reward": 1.547003149986267, "reward_std": 0.36151957511901855, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35406267642974854, "rewards/EvidenceHallucination/std": 0.3867344856262207, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 1.3445377349853516, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4761904776096344, "rewards/VideoAccuracy/std": 0.5054867267608643, "step": 372, "train_speed(iter/s)": 0.018387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 804.0, "completions/mean_length": 466.0476379394531, "completions/min_length": 332.0, "entropy/max": 0.5703125, "entropy/mean": 0.4375, "entropy/min": 0.302734375, "epoch": 0.373, "grad_norm": 1.2749642753472195, "kl": 0.25390625, "learning_rate": 1.4067366430758004e-06, "loss": 0.002570125972852111, "memory(GiB)": 146.12, "reward": 1.4805047512054443, "reward_std": 0.2484472692012787, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3461710214614868, "rewards/EvidenceHallucination/std": 0.45463377237319946, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 1.2087563276290894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.35412758588790894, "rewards/VideoAccuracy/std": 0.34480535984039307, "step": 373, "train_speed(iter/s)": 0.018394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 794.0, "completions/mean_length": 484.3809509277344, "completions/min_length": 326.0, "entropy/max": 0.48046875, "entropy/mean": 0.369140625, "entropy/min": 0.1787109375, "epoch": 0.374, "grad_norm": 1.1626635492438668, "kl": 0.26171875, "learning_rate": 1.403835622551325e-06, "loss": 0.00264472677372396, "memory(GiB)": 146.12, "reward": 2.1501784324645996, "reward_std": 0.19828462600708008, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5695993304252625, "rewards/EvidenceHallucination/std": 0.3984956741333008, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.0169869661331177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.785714328289032, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 0.879115641117096, "rewards/VideoAccuracy/std": 0.5946195125579834, "step": 374, "train_speed(iter/s)": 0.018399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 666.6666870117188, "completions/min_length": 344.0, "entropy/max": 0.68359375, "entropy/mean": 0.37890625, "entropy/min": 0.11572265625, "epoch": 0.375, "grad_norm": 1.171237322592966, "kl": 0.1982421875, "learning_rate": 1.4009305354066136e-06, "loss": 0.0020399591885507107, "memory(GiB)": 146.12, "reward": 1.7457466125488281, "reward_std": 0.4636306166648865, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3744157552719116, "rewards/EvidenceHallucination/std": 0.35868147015571594, "rewards/Evidence_Num_Record/mean": 5.761904716491699, "rewards/Evidence_Num_Record/std": 2.3038623332977295, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6161016821861267, "rewards/VideoAccuracy/std": 0.46276232600212097, "step": 375, "train_speed(iter/s)": 0.018389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 963.0, "completions/mean_length": 457.3571472167969, "completions/min_length": 332.0, "entropy/max": 0.6640625, "entropy/mean": 0.41796875, "entropy/min": 0.302734375, "epoch": 0.376, "grad_norm": 1.4800288015731955, "kl": 0.265625, "learning_rate": 1.3980214108958624e-06, "loss": 0.0026926174759864807, "memory(GiB)": 146.12, "reward": 1.857588529586792, "reward_std": 0.12305565923452377, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5636403560638428, "rewards/EvidenceHallucination/std": 0.43467506766319275, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.3216679096221924, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6900984644889832, "rewards/VideoAccuracy/std": 0.4148041009902954, "step": 376, "train_speed(iter/s)": 0.018393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 785.0, "completions/mean_length": 458.1428527832031, "completions/min_length": 302.0, "entropy/max": 0.51953125, "entropy/mean": 0.38671875, "entropy/min": 0.26953125, "epoch": 0.377, "grad_norm": 1.189318645739725, "kl": 0.259765625, "learning_rate": 1.3951082783139218e-06, "loss": 0.002627232577651739, "memory(GiB)": 146.12, "reward": 1.6650316715240479, "reward_std": 0.19645211100578308, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28568822145462036, "rewards/EvidenceHallucination/std": 0.39984413981437683, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.214507818222046, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.5197986364364624, "rewards/VideoAccuracy/std": 0.5648055672645569, "step": 377, "train_speed(iter/s)": 0.018361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 889.0, "completions/mean_length": 494.0476379394531, "completions/min_length": 300.0, "entropy/max": 0.67578125, "entropy/mean": 0.353515625, "entropy/min": 0.12890625, "epoch": 0.378, "grad_norm": 1.054499643623318, "kl": 0.2021484375, "learning_rate": 1.3921911669960054e-06, "loss": 0.002041890984401107, "memory(GiB)": 146.12, "reward": 2.0080480575561523, "reward_std": 0.15570762753486633, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.555682361125946, "rewards/EvidenceHallucination/std": 0.3886148929595947, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.6792786121368408, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7635781764984131, "rewards/VideoAccuracy/std": 0.44547799229621887, "step": 378, "train_speed(iter/s)": 0.018362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1564.0, "completions/mean_length": 526.7857055664062, "completions/min_length": 275.0, "entropy/max": 1.8359375, "entropy/mean": 0.498046875, "entropy/min": 0.265625, "epoch": 0.379, "grad_norm": 1.4223091111930677, "kl": 0.255859375, "learning_rate": 1.3892701063173915e-06, "loss": 0.002596626989543438, "memory(GiB)": 146.12, "reward": 1.986168622970581, "reward_std": 0.16616535186767578, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6691222786903381, "rewards/EvidenceHallucination/std": 0.3635716438293457, "rewards/Evidence_Num_Record/mean": 5.595238208770752, "rewards/Evidence_Num_Record/std": 3.7158825397491455, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8190109133720398, "rewards/VideoAccuracy/std": 0.34908995032310486, "step": 379, "train_speed(iter/s)": 0.018349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 743.0, "completions/mean_length": 432.66668701171875, "completions/min_length": 337.0, "entropy/max": 0.50390625, "entropy/mean": 0.396484375, "entropy/min": 0.2431640625, "epoch": 0.38, "grad_norm": 1.4051032258402398, "kl": 0.279296875, "learning_rate": 1.3863451256931284e-06, "loss": 0.002808385295793414, "memory(GiB)": 146.12, "reward": 1.8036223649978638, "reward_std": 0.37080907821655273, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5079375505447388, "rewards/EvidenceHallucination/std": 0.4352448582649231, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.9093654155731201, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6353679895401001, "rewards/VideoAccuracy/std": 0.42980101704597473, "step": 380, "train_speed(iter/s)": 0.018353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1157.0, "completions/mean_length": 514.8809814453125, "completions/min_length": 346.0, "entropy/max": 0.62890625, "entropy/mean": 0.306640625, "entropy/min": 0.12255859375, "epoch": 0.381, "grad_norm": 1.130339014282929, "kl": 0.21875, "learning_rate": 1.3834162545777392e-06, "loss": 0.0022574281319975853, "memory(GiB)": 146.12, "reward": 2.094353437423706, "reward_std": 0.08381448686122894, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2632342278957367, "rewards/EvidenceHallucination/std": 0.3826342523097992, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 2.3587875366210938, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8417066931724548, "rewards/VideoAccuracy/std": 0.4787370562553406, "step": 381, "train_speed(iter/s)": 0.01835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 982.0, "completions/mean_length": 588.3809814453125, "completions/min_length": 370.0, "entropy/max": 0.625, "entropy/mean": 0.453125, "entropy/min": 0.2890625, "epoch": 0.382, "grad_norm": 1.161291712057949, "kl": 0.255859375, "learning_rate": 1.380483522464923e-06, "loss": 0.0025750526692718267, "memory(GiB)": 146.12, "reward": 1.6367465257644653, "reward_std": 0.2806619107723236, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.445637583732605, "rewards/EvidenceHallucination/std": 0.419691801071167, "rewards/Evidence_Num_Record/mean": 5.809524059295654, "rewards/Evidence_Num_Record/std": 1.8771811723709106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5476190447807312, "rewards/VideoAccuracy/std": 0.503760576248169, "step": 382, "train_speed(iter/s)": 0.018335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1099.0, "completions/mean_length": 546.5, "completions/min_length": 367.0, "entropy/max": 0.49609375, "entropy/mean": 0.412109375, "entropy/min": 0.28515625, "epoch": 0.383, "grad_norm": 1.1662744808643895, "kl": 0.263671875, "learning_rate": 1.3775469588872598e-06, "loss": 0.00264447252266109, "memory(GiB)": 146.12, "reward": 1.3630290031433105, "reward_std": 0.3358883559703827, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19906720519065857, "rewards/EvidenceHallucination/std": 0.36768069863319397, "rewards/Evidence_Num_Record/mean": 5.11904764175415, "rewards/Evidence_Num_Record/std": 2.329056978225708, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.27321547269821167, "rewards/VideoAccuracy/std": 0.3392495810985565, "step": 383, "train_speed(iter/s)": 0.018337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 807.0, "completions/mean_length": 485.1190490722656, "completions/min_length": 318.0, "entropy/max": 0.490234375, "entropy/mean": 0.361328125, "entropy/min": 0.1796875, "epoch": 0.384, "grad_norm": 1.2736120355554539, "kl": 0.2578125, "learning_rate": 1.374606593415912e-06, "loss": 0.0026027606800198555, "memory(GiB)": 146.12, "reward": 2.415849447250366, "reward_std": 0.162336528301239, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5832903385162354, "rewards/EvidenceHallucination/std": 0.3543720543384552, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 1.2010449171066284, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 1.1325247287750244, "rewards/VideoAccuracy/std": 0.29747867584228516, "step": 384, "train_speed(iter/s)": 0.018336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 824.0, "completions/mean_length": 495.90478515625, "completions/min_length": 325.0, "entropy/max": 1.1953125, "entropy/mean": 0.419921875, "entropy/min": 0.1904296875, "epoch": 0.385, "grad_norm": 1.3107778840499427, "kl": 0.234375, "learning_rate": 1.3716624556603274e-06, "loss": 0.0023976964876055717, "memory(GiB)": 146.12, "reward": 2.003371238708496, "reward_std": 0.3091083765029907, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5313931703567505, "rewards/EvidenceHallucination/std": 0.3634487986564636, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.9906867742538452, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.7970927357673645, "rewards/VideoAccuracy/std": 0.3665767312049866, "step": 385, "train_speed(iter/s)": 0.01834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 779.0, "completions/mean_length": 496.452392578125, "completions/min_length": 348.0, "entropy/max": 0.59765625, "entropy/mean": 0.41796875, "entropy/min": 0.2890625, "epoch": 0.386, "grad_norm": 1.187078968146532, "kl": 0.259765625, "learning_rate": 1.3687145752679408e-06, "loss": 0.0026177032850682735, "memory(GiB)": 146.12, "reward": 1.79196298122406, "reward_std": 0.2241038978099823, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48707354068756104, "rewards/EvidenceHallucination/std": 0.41782069206237793, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 1.148902416229248, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.6326434016227722, "rewards/VideoAccuracy/std": 0.41035839915275574, "step": 386, "train_speed(iter/s)": 0.018337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 808.0, "completions/mean_length": 460.3333435058594, "completions/min_length": 297.0, "entropy/max": 0.52734375, "entropy/mean": 0.40625, "entropy/min": 0.23046875, "epoch": 0.387, "grad_norm": 1.3726077305695579, "kl": 0.259765625, "learning_rate": 1.3657629819238745e-06, "loss": 0.0026217461563646793, "memory(GiB)": 146.12, "reward": 1.9099597930908203, "reward_std": 0.43789178133010864, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4705699682235718, "rewards/EvidenceHallucination/std": 0.40831753611564636, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.234426736831665, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.7158457040786743, "rewards/VideoAccuracy/std": 0.55631422996521, "step": 387, "train_speed(iter/s)": 0.018342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 928.0, "completions/mean_length": 487.3095397949219, "completions/min_length": 289.0, "entropy/max": 0.74609375, "entropy/mean": 0.322265625, "entropy/min": 0.1796875, "epoch": 0.388, "grad_norm": 1.095365935281915, "kl": 0.22265625, "learning_rate": 1.3628077053506407e-06, "loss": 0.0022502231877297163, "memory(GiB)": 146.12, "reward": 2.0858771800994873, "reward_std": 0.3169812858104706, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48037728667259216, "rewards/EvidenceHallucination/std": 0.42832738161087036, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.938814401626587, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8231350183486938, "rewards/VideoAccuracy/std": 0.46208032965660095, "step": 388, "train_speed(iter/s)": 0.018334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 958.0, "completions/mean_length": 554.952392578125, "completions/min_length": 395.0, "entropy/max": 1.6796875, "entropy/mean": 0.435546875, "entropy/min": 0.287109375, "epoch": 0.389, "grad_norm": 1.1942709574591455, "kl": 0.259765625, "learning_rate": 1.3598487753078426e-06, "loss": 0.002639633370563388, "memory(GiB)": 146.12, "reward": 1.587154746055603, "reward_std": 0.2010001242160797, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.36558741331100464, "rewards/EvidenceHallucination/std": 0.40262478590011597, "rewards/Evidence_Num_Record/mean": 5.476190567016602, "rewards/Evidence_Num_Record/std": 1.8244690895080566, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.48546579480171204, "rewards/VideoAccuracy/std": 0.47066184878349304, "step": 389, "train_speed(iter/s)": 0.018334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 720.0, "completions/mean_length": 464.8333435058594, "completions/min_length": 323.0, "entropy/max": 0.546875, "entropy/mean": 0.431640625, "entropy/min": 0.296875, "epoch": 0.39, "grad_norm": 1.4047039030677193, "kl": 0.296875, "learning_rate": 1.3568862215918717e-06, "loss": 0.0029980493709445, "memory(GiB)": 146.12, "reward": 1.6332415342330933, "reward_std": 0.23186847567558289, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3746386468410492, "rewards/EvidenceHallucination/std": 0.4456513822078705, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.4618651866912842, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.4916471838951111, "rewards/VideoAccuracy/std": 0.45274612307548523, "step": 390, "train_speed(iter/s)": 0.018337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 774.0, "completions/mean_length": 513.9285888671875, "completions/min_length": 364.0, "entropy/max": 0.490234375, "entropy/mean": 0.3203125, "entropy/min": 0.14453125, "epoch": 0.391, "grad_norm": 1.05864387239661, "kl": 0.2158203125, "learning_rate": 1.3539200740356119e-06, "loss": 0.002177801914513111, "memory(GiB)": 146.12, "reward": 2.3062353134155273, "reward_std": 0.14617519080638885, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5758527517318726, "rewards/EvidenceHallucination/std": 0.3537074327468872, "rewards/Evidence_Num_Record/mean": 4.690476417541504, "rewards/Evidence_Num_Record/std": 1.3157228231430054, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9958263635635376, "rewards/VideoAccuracy/std": 0.3850289285182953, "step": 391, "train_speed(iter/s)": 0.018335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1077.0, "completions/mean_length": 559.7857055664062, "completions/min_length": 346.0, "entropy/max": 0.94921875, "entropy/mean": 0.486328125, "entropy/min": 0.291015625, "epoch": 0.392, "grad_norm": 1.2034454735474485, "kl": 0.263671875, "learning_rate": 1.3509503625081357e-06, "loss": 0.0026875040493905544, "memory(GiB)": 146.12, "reward": 1.6635810136795044, "reward_std": 0.2549924850463867, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4607619345188141, "rewards/EvidenceHallucination/std": 0.4197237193584442, "rewards/Evidence_Num_Record/mean": 5.6666669845581055, "rewards/Evidence_Num_Record/std": 1.8566515445709229, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5714285969734192, "rewards/VideoAccuracy/std": 0.5008702874183655, "step": 392, "train_speed(iter/s)": 0.018332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 816.0, "completions/mean_length": 532.6666870117188, "completions/min_length": 385.0, "entropy/max": 0.58984375, "entropy/mean": 0.447265625, "entropy/min": 0.333984375, "epoch": 0.393, "grad_norm": 1.3161624316795113, "kl": 0.271484375, "learning_rate": 1.347977116914405e-06, "loss": 0.0027165599167346954, "memory(GiB)": 146.12, "reward": 1.473059058189392, "reward_std": 0.38854551315307617, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23351234197616577, "rewards/EvidenceHallucination/std": 0.38533180952072144, "rewards/Evidence_Num_Record/mean": 4.928571701049805, "rewards/Evidence_Num_Record/std": 1.8398417234420776, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.3787374198436737, "rewards/VideoAccuracy/std": 0.34927859902381897, "step": 393, "train_speed(iter/s)": 0.018333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 664.0, "completions/mean_length": 492.69049072265625, "completions/min_length": 355.0, "entropy/max": 0.494140625, "entropy/mean": 0.359375, "entropy/min": 0.1953125, "epoch": 0.394, "grad_norm": 1.1185187742778906, "kl": 0.26953125, "learning_rate": 1.3450003671949705e-06, "loss": 0.0027246675454080105, "memory(GiB)": 146.12, "reward": 2.0985453128814697, "reward_std": 0.11049169301986694, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6033493280410767, "rewards/EvidenceHallucination/std": 0.43722671270370483, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.0155583620071411, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8112086653709412, "rewards/VideoAccuracy/std": 0.5243479013442993, "step": 394, "train_speed(iter/s)": 0.01834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 664.952392578125, "completions/min_length": 418.0, "entropy/max": 0.62890625, "entropy/mean": 0.34765625, "entropy/min": 0.0947265625, "epoch": 0.395, "grad_norm": 0.7729392430737445, "kl": 0.2109375, "learning_rate": 1.3420201433256689e-06, "loss": 0.0022313406225293875, "memory(GiB)": 146.12, "reward": 1.817870020866394, "reward_std": 0.1385781168937683, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.34599390625953674, "rewards/EvidenceHallucination/std": 0.3843267858028412, "rewards/Evidence_Num_Record/mean": 5.976190567016602, "rewards/Evidence_Num_Record/std": 2.1582298278808594, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6939092874526978, "rewards/VideoAccuracy/std": 0.5304246544837952, "step": 395, "train_speed(iter/s)": 0.018326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 715.0, "completions/mean_length": 486.8333435058594, "completions/min_length": 342.0, "entropy/max": 0.5390625, "entropy/mean": 0.408203125, "entropy/min": 0.283203125, "epoch": 0.396, "grad_norm": 1.1781912522495772, "kl": 0.265625, "learning_rate": 1.3390364753173204e-06, "loss": 0.0026801545172929764, "memory(GiB)": 146.12, "reward": 1.6279311180114746, "reward_std": 0.1566619575023651, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37344738841056824, "rewards/EvidenceHallucination/std": 0.42502158880233765, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.086556077003479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.49133700132369995, "rewards/VideoAccuracy/std": 0.4137079119682312, "step": 396, "train_speed(iter/s)": 0.018329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 581.0, "completions/mean_length": 453.5952453613281, "completions/min_length": 318.0, "entropy/max": 0.5546875, "entropy/mean": 0.41796875, "entropy/min": 0.216796875, "epoch": 0.397, "grad_norm": 1.0415509961065834, "kl": 0.283203125, "learning_rate": 1.33604939321543e-06, "loss": 0.003073825966566801, "memory(GiB)": 146.12, "reward": 1.510471224784851, "reward_std": 0.15900374948978424, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29543644189834595, "rewards/EvidenceHallucination/std": 0.4081650674343109, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 0.5763435363769531, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.35138386487960815, "rewards/VideoAccuracy/std": 0.4781220853328705, "step": 397, "train_speed(iter/s)": 0.018335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 737.0, "completions/mean_length": 488.5714416503906, "completions/min_length": 368.0, "entropy/max": 1.0234375, "entropy/mean": 0.337890625, "entropy/min": 0.150390625, "epoch": 0.398, "grad_norm": 1.1410838956719997, "kl": 0.21875, "learning_rate": 1.3330589270998806e-06, "loss": 0.002219142857939005, "memory(GiB)": 146.12, "reward": 2.0311450958251953, "reward_std": 0.23200544714927673, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3149089217185974, "rewards/EvidenceHallucination/std": 0.3664127290248871, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 1.3278050422668457, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8348298072814941, "rewards/VideoAccuracy/std": 0.4276603162288666, "step": 398, "train_speed(iter/s)": 0.018334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/mean_length": 560.047607421875, "completions/min_length": 364.0, "entropy/max": 0.72265625, "entropy/mean": 0.400390625, "entropy/min": 0.26953125, "epoch": 0.399, "grad_norm": 1.1453943649734857, "kl": 0.275390625, "learning_rate": 1.3300651070846331e-06, "loss": 0.0027932848315685987, "memory(GiB)": 146.12, "reward": 1.3441669940948486, "reward_std": 0.3663594424724579, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.167385995388031, "rewards/EvidenceHallucination/std": 0.32827678322792053, "rewards/Evidence_Num_Record/mean": 5.88095235824585, "rewards/Evidence_Num_Record/std": 2.2760939598083496, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.27735647559165955, "rewards/VideoAccuracy/std": 0.43789809942245483, "step": 399, "train_speed(iter/s)": 0.018336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 606.1904907226562, "completions/min_length": 365.0, "entropy/max": 0.6875, "entropy/mean": 0.404296875, "entropy/min": 0.06396484375, "epoch": 0.4, "grad_norm": 0.9919679351042843, "kl": 0.2333984375, "learning_rate": 1.3270679633174217e-06, "loss": 0.0026555825024843216, "memory(GiB)": 146.12, "reward": 1.5256381034851074, "reward_std": 0.3579689860343933, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22879035770893097, "rewards/EvidenceHallucination/std": 0.37589502334594727, "rewards/Evidence_Num_Record/mean": 4.690476417541504, "rewards/Evidence_Num_Record/std": 1.689105749130249, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.43702271580696106, "rewards/VideoAccuracy/std": 0.5390593409538269, "step": 400, "train_speed(iter/s)": 0.018312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 638.0, "completions/mean_length": 449.69049072265625, "completions/min_length": 320.0, "entropy/max": 0.4609375, "entropy/mean": 0.302734375, "entropy/min": 0.146484375, "epoch": 0.401, "grad_norm": 1.139445341067388, "kl": 0.2490234375, "learning_rate": 1.3240675259794504e-06, "loss": 0.0025015901774168015, "memory(GiB)": 146.12, "reward": 2.1623573303222656, "reward_std": 0.13159291446208954, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37520307302474976, "rewards/EvidenceHallucination/std": 0.44481533765792847, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.7624309062957764, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9285714626312256, "rewards/HonestTime/std": 0.26066118478775024, "rewards/VideoAccuracy/mean": 0.9016023278236389, "rewards/VideoAccuracy/std": 0.3952128291130066, "step": 401, "train_speed(iter/s)": 0.018272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 736.357177734375, "completions/min_length": 312.0, "entropy/max": 0.69921875, "entropy/mean": 0.404296875, "entropy/min": 0.107421875, "epoch": 0.402, "grad_norm": 1.2138014727028978, "kl": 0.2197265625, "learning_rate": 1.3210638252850906e-06, "loss": 0.0023418040946125984, "memory(GiB)": 146.12, "reward": 1.741194486618042, "reward_std": 0.3397737145423889, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4916861057281494, "rewards/EvidenceHallucination/std": 0.3802826404571533, "rewards/Evidence_Num_Record/mean": 7.428571701049805, "rewards/Evidence_Num_Record/std": 3.6702020168304443, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 402, "train_speed(iter/s)": 0.018248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 777.0, "completions/mean_length": 517.8809814453125, "completions/min_length": 367.0, "entropy/max": 0.57421875, "entropy/mean": 0.4296875, "entropy/min": 0.33203125, "epoch": 0.403, "grad_norm": 1.273083723325503, "kl": 0.279296875, "learning_rate": 1.318056891481575e-06, "loss": 0.0028203255496919155, "memory(GiB)": 146.12, "reward": 1.5516564846038818, "reward_std": 0.24027308821678162, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20131969451904297, "rewards/EvidenceHallucination/std": 0.36238354444503784, "rewards/Evidence_Num_Record/mean": 5.1666669845581055, "rewards/Evidence_Num_Record/std": 1.5913894176483154, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.4542495310306549, "rewards/VideoAccuracy/std": 0.3473469913005829, "step": 403, "train_speed(iter/s)": 0.018256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 553.1190795898438, "completions/min_length": 319.0, "entropy/max": 0.62109375, "entropy/mean": 0.345703125, "entropy/min": 0.0908203125, "epoch": 0.404, "grad_norm": 1.1971248641583463, "kl": 0.25, "learning_rate": 1.3150467548486928e-06, "loss": 0.0026676864363253117, "memory(GiB)": 146.12, "reward": 2.1506330966949463, "reward_std": 0.2771437466144562, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5300754904747009, "rewards/EvidenceHallucination/std": 0.39288297295570374, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.2308934926986694, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8898561000823975, "rewards/VideoAccuracy/std": 0.3947162926197052, "step": 404, "train_speed(iter/s)": 0.018233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 579.5238037109375, "completions/min_length": 295.0, "entropy/max": 1.25, "entropy/mean": 0.42578125, "entropy/min": 0.16015625, "epoch": 0.405, "grad_norm": 1.055141020743444, "kl": 0.2431640625, "learning_rate": 1.3120334456984869e-06, "loss": 0.0025412007234990597, "memory(GiB)": 146.12, "reward": 1.7340662479400635, "reward_std": 0.20672297477722168, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44430071115493774, "rewards/EvidenceHallucination/std": 0.4083597660064697, "rewards/Evidence_Num_Record/mean": 5.6666669845581055, "rewards/Evidence_Num_Record/std": 2.9686710834503174, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5904439687728882, "rewards/VideoAccuracy/std": 0.4767262041568756, "step": 405, "train_speed(iter/s)": 0.018211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 726.0, "completions/mean_length": 476.40478515625, "completions/min_length": 331.0, "entropy/max": 0.58203125, "entropy/mean": 0.435546875, "entropy/min": 0.31640625, "epoch": 0.406, "grad_norm": 1.2421606970845038, "kl": 0.302734375, "learning_rate": 1.3090169943749473e-06, "loss": 0.003044125158339739, "memory(GiB)": 146.12, "reward": 1.8416240215301514, "reward_std": 0.2010241448879242, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48798543214797974, "rewards/EvidenceHallucination/std": 0.421680212020874, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.21091628074646, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.6535505652427673, "rewards/VideoAccuracy/std": 0.36351004242897034, "step": 406, "train_speed(iter/s)": 0.018217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 570.5952758789062, "completions/min_length": 327.0, "entropy/max": 0.6328125, "entropy/mean": 0.3984375, "entropy/min": 0.052978515625, "epoch": 0.407, "grad_norm": 1.032223184887646, "kl": 0.271484375, "learning_rate": 1.3059974312537052e-06, "loss": 0.002970391418784857, "memory(GiB)": 146.12, "reward": 1.5923843383789062, "reward_std": 0.30783599615097046, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3410428762435913, "rewards/EvidenceHallucination/std": 0.42950204014778137, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 0.9084070324897766, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.45274725556373596, "rewards/VideoAccuracy/std": 0.5236507058143616, "step": 407, "train_speed(iter/s)": 0.018193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 572.5952758789062, "completions/min_length": 336.0, "entropy/max": 0.9140625, "entropy/mean": 0.306640625, "entropy/min": 0.0849609375, "epoch": 0.408, "grad_norm": 1.0113628742819467, "kl": 0.2041015625, "learning_rate": 1.3029747867417273e-06, "loss": 0.002208232879638672, "memory(GiB)": 146.12, "reward": 1.915709376335144, "reward_std": 0.3583010137081146, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31953245401382446, "rewards/EvidenceHallucination/std": 0.42113471031188965, "rewards/Evidence_Num_Record/mean": 4.857142925262451, "rewards/Evidence_Num_Record/std": 3.9482717514038086, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.7470409274101257, "rewards/VideoAccuracy/std": 0.47045883536338806, "step": 408, "train_speed(iter/s)": 0.018173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 566.2857055664062, "completions/min_length": 308.0, "entropy/max": 2.515625, "entropy/mean": 0.50390625, "entropy/min": 0.0927734375, "epoch": 0.409, "grad_norm": 1.458716353547437, "kl": 0.28515625, "learning_rate": 1.2999490912770106e-06, "loss": 0.002989646978676319, "memory(GiB)": 146.12, "reward": 1.6135457754135132, "reward_std": 0.3748294711112976, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35370171070098877, "rewards/EvidenceHallucination/std": 0.43925637006759644, "rewards/Evidence_Num_Record/mean": 4.904761791229248, "rewards/Evidence_Num_Record/std": 1.4784554243087769, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5261387228965759, "rewards/VideoAccuracy/std": 0.4697481393814087, "step": 409, "train_speed(iter/s)": 0.018157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 568.952392578125, "completions/min_length": 349.0, "entropy/max": 0.73046875, "entropy/mean": 0.40625, "entropy/min": 0.10498046875, "epoch": 0.41, "grad_norm": 1.3565337752460667, "kl": 0.271484375, "learning_rate": 1.296920375328275e-06, "loss": 0.002834528684616089, "memory(GiB)": 146.8, "reward": 1.4560307264328003, "reward_std": 0.29307645559310913, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29645535349845886, "rewards/EvidenceHallucination/std": 0.4277758300304413, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.0548268556594849, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.35388249158859253, "rewards/VideoAccuracy/std": 0.3094964623451233, "step": 410, "train_speed(iter/s)": 0.018145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07142857142857142, "completions/max_length": 2625.0, "completions/mean_length": 630.7857055664062, "completions/min_length": 342.0, "entropy/max": 0.64453125, "entropy/mean": 0.2734375, "entropy/min": 0.0673828125, "epoch": 0.411, "grad_norm": 1.0384389324702676, "kl": 0.1962890625, "learning_rate": 1.293888669394656e-06, "loss": 0.0023590796627104282, "memory(GiB)": 147.17, "reward": 2.157050848007202, "reward_std": 0.44932615756988525, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.507347822189331, "rewards/EvidenceHallucination/std": 0.4063733220100403, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 0.8900012969970703, "rewards/Format/mean": 0.9285714626312256, "rewards/Format/std": 0.26066118478775024, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8912954926490784, "rewards/VideoAccuracy/std": 0.4708878993988037, "step": 411, "train_speed(iter/s)": 0.018115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09523809523809523, "completions/max_length": 2625.0, "completions/mean_length": 697.3333740234375, "completions/min_length": 243.0, "entropy/max": 2.59375, "entropy/mean": 0.447265625, "entropy/min": 0.09765625, "epoch": 0.412, "grad_norm": 0.9814914235030084, "kl": 0.2412109375, "learning_rate": 1.290854004005399e-06, "loss": 0.0028356886468827724, "memory(GiB)": 147.17, "reward": 1.6398093700408936, "reward_std": 0.3727138936519623, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46095216274261475, "rewards/EvidenceHallucination/std": 0.40103238821029663, "rewards/Evidence_Num_Record/mean": 4.857142925262451, "rewards/Evidence_Num_Record/std": 1.523337721824646, "rewards/Format/mean": 0.9047619104385376, "rewards/Format/std": 0.297101765871048, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679577350616455, "step": 412, "train_speed(iter/s)": 0.018094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 747.0, "completions/mean_length": 494.66668701171875, "completions/min_length": 297.0, "entropy/max": 0.58203125, "entropy/mean": 0.396484375, "entropy/min": 0.216796875, "epoch": 0.413, "grad_norm": 1.0223745329810805, "kl": 0.283203125, "learning_rate": 1.287816409719551e-06, "loss": 0.0028338914271444082, "memory(GiB)": 147.17, "reward": 1.2753490209579468, "reward_std": 0.13075286149978638, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.11048247665166855, "rewards/EvidenceHallucination/std": 0.27473148703575134, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.0852190256118774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.19610968232154846, "rewards/VideoAccuracy/std": 0.2764948904514313, "step": 413, "train_speed(iter/s)": 0.018114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 503.71429443359375, "completions/min_length": 304.0, "entropy/max": 0.703125, "entropy/mean": 0.37890625, "entropy/min": 0.08837890625, "epoch": 0.414, "grad_norm": 1.09227655023033, "kl": 0.294921875, "learning_rate": 1.2847759171256522e-06, "loss": 0.0031100395135581493, "memory(GiB)": 147.17, "reward": 2.3160693645477295, "reward_std": 0.24112042784690857, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6798633337020874, "rewards/EvidenceHallucination/std": 0.35106703639030457, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 0.8379085063934326, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 1.0253347158432007, "rewards/VideoAccuracy/std": 0.35029590129852295, "step": 414, "train_speed(iter/s)": 0.018096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 536.547607421875, "completions/min_length": 238.0, "entropy/max": 1.609375, "entropy/mean": 0.4453125, "entropy/min": 0.1435546875, "epoch": 0.415, "grad_norm": 1.0731366322658153, "kl": 0.255859375, "learning_rate": 1.2817325568414297e-06, "loss": 0.002732915338128805, "memory(GiB)": 147.17, "reward": 1.9930468797683716, "reward_std": 0.25008612871170044, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48213377594947815, "rewards/EvidenceHallucination/std": 0.43128907680511475, "rewards/Evidence_Num_Record/mean": 4.88095235824585, "rewards/Evidence_Num_Record/std": 2.1775169372558594, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.8466201424598694, "rewards/VideoAccuracy/std": 0.3688356876373291, "step": 415, "train_speed(iter/s)": 0.018086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 700.0, "completions/mean_length": 436.0238037109375, "completions/min_length": 319.0, "entropy/max": 0.494140625, "entropy/mean": 0.384765625, "entropy/min": 0.236328125, "epoch": 0.416, "grad_norm": 1.42174973703141, "kl": 0.30859375, "learning_rate": 1.2786863595134878e-06, "loss": 0.0030953167006373405, "memory(GiB)": 147.17, "reward": 1.2248696088790894, "reward_std": 0.30835697054862976, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.10754480957984924, "rewards/EvidenceHallucination/std": 0.2973156273365021, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 1.0473682880401611, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2142857164144516, "rewards/HonestTime/std": 0.4152997136116028, "rewards/VideoAccuracy/mean": 0.16050346195697784, "rewards/VideoAccuracy/std": 0.30264562368392944, "step": 416, "train_speed(iter/s)": 0.018087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 620.0, "completions/mean_length": 423.3571472167969, "completions/min_length": 314.0, "entropy/max": 0.5234375, "entropy/mean": 0.384765625, "entropy/min": 0.259765625, "epoch": 0.417, "grad_norm": 1.346342598424452, "kl": 0.345703125, "learning_rate": 1.275637355816999e-06, "loss": 0.003472857875749469, "memory(GiB)": 147.17, "reward": 1.6389472484588623, "reward_std": 0.2763007581233978, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32168424129486084, "rewards/EvidenceHallucination/std": 0.4292060136795044, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.686691164970398, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.4793723523616791, "rewards/VideoAccuracy/std": 0.4684654474258423, "step": 417, "train_speed(iter/s)": 0.018089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 601.452392578125, "completions/min_length": 278.0, "entropy/max": 1.5, "entropy/mean": 0.291015625, "entropy/min": 0.103515625, "epoch": 0.418, "grad_norm": 1.0149165496464008, "kl": 0.21484375, "learning_rate": 1.2725855764553978e-06, "loss": 0.0024170055985450745, "memory(GiB)": 147.17, "reward": 2.131437063217163, "reward_std": 0.29418548941612244, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3117740750312805, "rewards/EvidenceHallucination/std": 0.4131038188934326, "rewards/Evidence_Num_Record/mean": 4.6666669845581055, "rewards/Evidence_Num_Record/std": 1.9959309101104736, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.9643202424049377, "rewards/VideoAccuracy/std": 0.3569818437099457, "step": 418, "train_speed(iter/s)": 0.018071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 708.0, "completions/mean_length": 457.3095397949219, "completions/min_length": 312.0, "entropy/max": 1.203125, "entropy/mean": 0.439453125, "entropy/min": 0.27734375, "epoch": 0.419, "grad_norm": 1.4330677765387068, "kl": 0.328125, "learning_rate": 1.269531052160068e-06, "loss": 0.003306722268462181, "memory(GiB)": 147.17, "reward": 1.9174654483795166, "reward_std": 0.1975611299276352, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5992318391799927, "rewards/EvidenceHallucination/std": 0.35757526755332947, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.1305595636367798, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.764285683631897, "rewards/VideoAccuracy/std": 0.38623979687690735, "step": 419, "train_speed(iter/s)": 0.01806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/mean_length": 423.5952453613281, "completions/min_length": 262.0, "entropy/max": 0.6015625, "entropy/mean": 0.37890625, "entropy/min": 0.1484375, "epoch": 0.42, "grad_norm": 1.3890351791493056, "kl": 0.326171875, "learning_rate": 1.2664738136900348e-06, "loss": 0.003325998317450285, "memory(GiB)": 147.17, "reward": 1.5204228162765503, "reward_std": 0.22484032809734344, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2736830413341522, "rewards/EvidenceHallucination/std": 0.41822507977485657, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.66999751329422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.39901959896087646, "rewards/VideoAccuracy/std": 0.4555363059043884, "step": 420, "train_speed(iter/s)": 0.018069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/mean_length": 464.19049072265625, "completions/min_length": 290.0, "entropy/max": 0.58203125, "entropy/mean": 0.28515625, "entropy/min": 0.1318359375, "epoch": 0.421, "grad_norm": 1.059510007020288, "kl": 0.267578125, "learning_rate": 1.2634138918316565e-06, "loss": 0.0027240943163633347, "memory(GiB)": 147.17, "reward": 2.171023368835449, "reward_std": 0.1849653422832489, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46852439641952515, "rewards/EvidenceHallucination/std": 0.40686750411987305, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.7265497446060181, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8820803761482239, "rewards/VideoAccuracy/std": 0.4453311562538147, "step": 421, "train_speed(iter/s)": 0.018071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/mean_length": 505.0714416503906, "completions/min_length": 286.0, "entropy/max": 1.2421875, "entropy/mean": 0.447265625, "entropy/min": 0.1787109375, "epoch": 0.422, "grad_norm": 1.0522285569899024, "kl": 0.328125, "learning_rate": 1.260351317398312e-06, "loss": 0.0034251343458890915, "memory(GiB)": 147.17, "reward": 1.7768745422363281, "reward_std": 0.014493129216134548, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5510392189025879, "rewards/EvidenceHallucination/std": 0.4110569953918457, "rewards/Evidence_Num_Record/mean": 4.952381134033203, "rewards/Evidence_Num_Record/std": 2.0115809440612793, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711876034736633, "step": 422, "train_speed(iter/s)": 0.018061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 489.3095397949219, "completions/min_length": 296.0, "entropy/max": 0.68359375, "entropy/mean": 0.400390625, "entropy/min": 0.095703125, "epoch": 0.423, "grad_norm": 1.1106053463889534, "kl": 0.296875, "learning_rate": 1.2572861212300916e-06, "loss": 0.0031044986099004745, "memory(GiB)": 147.17, "reward": 1.2903608083724976, "reward_std": 0.28117311000823975, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13605429232120514, "rewards/EvidenceHallucination/std": 0.3380788266658783, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 1.0040568113327026, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.20838797092437744, "rewards/VideoAccuracy/std": 0.30370450019836426, "step": 423, "train_speed(iter/s)": 0.018054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07142857142857142, "completions/max_length": 2625.0, "completions/mean_length": 630.0952758789062, "completions/min_length": 291.0, "entropy/max": 0.81640625, "entropy/mean": 0.353515625, "entropy/min": 0.08447265625, "epoch": 0.424, "grad_norm": 0.9851445551763302, "kl": 0.2255859375, "learning_rate": 1.2542183341934871e-06, "loss": 0.002961507998406887, "memory(GiB)": 147.17, "reward": 2.1810081005096436, "reward_std": 0.5009894371032715, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7007943987846375, "rewards/EvidenceHallucination/std": 0.3857671320438385, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.7265497446060181, "rewards/Format/mean": 0.9285714626312256, "rewards/Format/std": 0.26066118478775024, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9098968505859375, "rewards/VideoAccuracy/std": 0.4694446921348572, "step": 424, "train_speed(iter/s)": 0.018035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1064.0, "completions/mean_length": 563.0, "completions/min_length": 316.0, "entropy/max": 0.7734375, "entropy/mean": 0.36328125, "entropy/min": 0.1357421875, "epoch": 0.425, "grad_norm": 0.8763542519990872, "kl": 0.2578125, "learning_rate": 1.251147987181079e-06, "loss": 0.002668556524440646, "memory(GiB)": 147.17, "reward": 1.6808499097824097, "reward_std": 0.125533327460289, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.11130917072296143, "rewards/EvidenceHallucination/std": 0.2784552574157715, "rewards/Evidence_Num_Record/mean": 5.357142925262451, "rewards/Evidence_Num_Record/std": 2.0579581260681152, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.591921329498291, "rewards/VideoAccuracy/std": 0.582549512386322, "step": 425, "train_speed(iter/s)": 0.01804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 481.8571472167969, "completions/min_length": 252.0, "entropy/max": 0.6875, "entropy/mean": 0.361328125, "entropy/min": 0.103515625, "epoch": 0.426, "grad_norm": 0.9850528322219039, "kl": 0.310546875, "learning_rate": 1.248075111111229e-06, "loss": 0.0032261803280562162, "memory(GiB)": 147.17, "reward": 1.2789486646652222, "reward_std": 0.1393444687128067, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.11669065803289413, "rewards/EvidenceHallucination/std": 0.25353145599365234, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 1.630323886871338, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.2056104838848114, "rewards/VideoAccuracy/std": 0.28642597794532776, "step": 426, "train_speed(iter/s)": 0.018032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/mean_length": 395.3571472167969, "completions/min_length": 307.0, "entropy/max": 0.5703125, "entropy/mean": 0.3828125, "entropy/min": 0.2314453125, "epoch": 0.427, "grad_norm": 1.00042389463066, "kl": 0.365234375, "learning_rate": 1.244999736927764e-06, "loss": 0.00365253328345716, "memory(GiB)": 147.17, "reward": 1.7335829734802246, "reward_std": 0.2119014859199524, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39727094769477844, "rewards/EvidenceHallucination/std": 0.4464435875415802, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.7071067690849304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.554128885269165, "rewards/VideoAccuracy/std": 0.606694757938385, "step": 427, "train_speed(iter/s)": 0.018038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1055.0, "completions/mean_length": 579.2857055664062, "completions/min_length": 309.0, "entropy/max": 0.734375, "entropy/mean": 0.31640625, "entropy/min": 0.1328125, "epoch": 0.428, "grad_norm": 1.0621340992825263, "kl": 0.2294921875, "learning_rate": 1.2419218955996676e-06, "loss": 0.002335094381123781, "memory(GiB)": 147.17, "reward": 1.875522494316101, "reward_std": 0.24692384898662567, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.34564051032066345, "rewards/EvidenceHallucination/std": 0.44944071769714355, "rewards/Evidence_Num_Record/mean": 5.023809432983398, "rewards/Evidence_Num_Record/std": 1.8933528661727905, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074110031128, "rewards/VideoAccuracy/mean": 0.6825847625732422, "rewards/VideoAccuracy/std": 0.5046215057373047, "step": 428, "train_speed(iter/s)": 0.018032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1528.0, "completions/mean_length": 497.3333435058594, "completions/min_length": 324.0, "entropy/max": 0.63671875, "entropy/mean": 0.375, "entropy/min": 0.1416015625, "epoch": 0.429, "grad_norm": 1.2591435950206409, "kl": 0.326171875, "learning_rate": 1.2388416181207688e-06, "loss": 0.0034011879470199347, "memory(GiB)": 147.17, "reward": 1.6917601823806763, "reward_std": 0.2739050090312958, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49674439430236816, "rewards/EvidenceHallucination/std": 0.4614964425563812, "rewards/Evidence_Num_Record/mean": 4.5, "rewards/Evidence_Num_Record/std": 1.941209316253662, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5638399124145508, "rewards/VideoAccuracy/std": 0.4786205291748047, "step": 429, "train_speed(iter/s)": 0.018023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 617.0, "completions/mean_length": 443.8095397949219, "completions/min_length": 341.0, "entropy/max": 0.64453125, "entropy/mean": 0.419921875, "entropy/min": 0.2890625, "epoch": 0.43, "grad_norm": 1.1075464620990976, "kl": 0.328125, "learning_rate": 1.2357589355094273e-06, "loss": 0.003294752910733223, "memory(GiB)": 147.17, "reward": 1.500035285949707, "reward_std": 0.24608975648880005, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31390058994293213, "rewards/EvidenceHallucination/std": 0.4289908707141876, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7083376049995422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.3705884516239166, "rewards/VideoAccuracy/std": 0.47497791051864624, "step": 430, "train_speed(iter/s)": 0.018028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/mean_length": 422.5238037109375, "completions/min_length": 269.0, "entropy/max": 0.578125, "entropy/mean": 0.291015625, "entropy/min": 0.1357421875, "epoch": 0.431, "grad_norm": 1.1916147615013701, "kl": 0.279296875, "learning_rate": 1.2326738788082223e-06, "loss": 0.002827655989676714, "memory(GiB)": 147.17, "reward": 2.2290050983428955, "reward_std": 0.156645268201828, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44680774211883545, "rewards/EvidenceHallucination/std": 0.43866533041000366, "rewards/Evidence_Num_Record/mean": 3.404762029647827, "rewards/Evidence_Num_Record/std": 0.6270147562026978, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9396434426307678, "rewards/VideoAccuracy/std": 0.4963000416755676, "step": 431, "train_speed(iter/s)": 0.018031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 555.2380981445312, "completions/min_length": 287.0, "entropy/max": 3.125, "entropy/mean": 0.56640625, "entropy/min": 0.10498046875, "epoch": 0.432, "grad_norm": 1.065170591762536, "kl": 0.306640625, "learning_rate": 1.229586479083641e-06, "loss": 0.0032462298404425383, "memory(GiB)": 147.17, "reward": 1.8200960159301758, "reward_std": 0.2258869856595993, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5885754823684692, "rewards/EvidenceHallucination/std": 0.4088142514228821, "rewards/Evidence_Num_Record/mean": 5.023809432983398, "rewards/Evidence_Num_Record/std": 1.7177424430847168, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7142857313156128, "rewards/VideoAccuracy/std": 0.45722997188568115, "step": 432, "train_speed(iter/s)": 0.018013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 889.0, "completions/mean_length": 453.1428527832031, "completions/min_length": 283.0, "entropy/max": 0.76171875, "entropy/mean": 0.39453125, "entropy/min": 0.1923828125, "epoch": 0.433, "grad_norm": 1.3709639169709829, "kl": 0.341796875, "learning_rate": 1.2264967674257646e-06, "loss": 0.0034402552992105484, "memory(GiB)": 147.17, "reward": 1.442723035812378, "reward_std": 0.2591922879219055, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23618589341640472, "rewards/EvidenceHallucination/std": 0.4067757725715637, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.6973367929458618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.3478667140007019, "rewards/VideoAccuracy/std": 0.3904357850551605, "step": 433, "train_speed(iter/s)": 0.018016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 640.0, "completions/mean_length": 469.4761962890625, "completions/min_length": 336.0, "entropy/max": 0.5703125, "entropy/mean": 0.3359375, "entropy/min": 0.1181640625, "epoch": 0.434, "grad_norm": 1.071202993072842, "kl": 0.3046875, "learning_rate": 1.2234047749479541e-06, "loss": 0.0030643518548458815, "memory(GiB)": 147.17, "reward": 1.7931246757507324, "reward_std": 0.24046742916107178, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3707311451435089, "rewards/EvidenceHallucination/std": 0.4582987427711487, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.7589956521987915, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5523116588592529, "rewards/VideoAccuracy/std": 0.5277762413024902, "step": 434, "train_speed(iter/s)": 0.018034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 846.0, "completions/mean_length": 478.23809814453125, "completions/min_length": 323.0, "entropy/max": 0.703125, "entropy/mean": 0.35546875, "entropy/min": 0.1494140625, "epoch": 0.435, "grad_norm": 0.9180245436458784, "kl": 0.27734375, "learning_rate": 1.2203105327865407e-06, "loss": 0.002856798470020294, "memory(GiB)": 147.17, "reward": 1.877048373222351, "reward_std": 0.038632702082395554, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5482261180877686, "rewards/EvidenceHallucination/std": 0.4071207344532013, "rewards/Evidence_Num_Record/mean": 4.761904716491699, "rewards/Evidence_Num_Record/std": 1.7080801725387573, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7007363438606262, "rewards/VideoAccuracy/std": 0.5069580674171448, "step": 435, "train_speed(iter/s)": 0.018036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1574.0, "completions/mean_length": 499.16668701171875, "completions/min_length": 300.0, "entropy/max": 0.51171875, "entropy/mean": 0.353515625, "entropy/min": 0.1484375, "epoch": 0.436, "grad_norm": 1.3879530713684283, "kl": 0.314453125, "learning_rate": 1.2172140721005079e-06, "loss": 0.0032035568729043007, "memory(GiB)": 147.17, "reward": 1.9349544048309326, "reward_std": 0.13429507613182068, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6770479083061218, "rewards/EvidenceHallucination/std": 0.394752562046051, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.3104157447814941, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2142857164144516, "rewards/HonestTime/std": 0.4152997136116028, "rewards/VideoAccuracy/mean": 0.7566876411437988, "rewards/VideoAccuracy/std": 0.3377890884876251, "step": 436, "train_speed(iter/s)": 0.018023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 660.0, "completions/mean_length": 420.69049072265625, "completions/min_length": 230.0, "entropy/max": 0.49609375, "entropy/mean": 0.375, "entropy/min": 0.224609375, "epoch": 0.437, "grad_norm": 1.0397588887069589, "kl": 0.341796875, "learning_rate": 1.2141154240711804e-06, "loss": 0.003630727296695113, "memory(GiB)": 147.17, "reward": 1.5739099979400635, "reward_std": 0.12505431473255157, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3220664858818054, "rewards/EvidenceHallucination/std": 0.44022148847579956, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.8890219330787659, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.4094966650009155, "rewards/VideoAccuracy/std": 0.5382682681083679, "step": 437, "train_speed(iter/s)": 0.018042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/mean_length": 506.9761962890625, "completions/min_length": 293.0, "entropy/max": 1.828125, "entropy/mean": 0.361328125, "entropy/min": 0.1005859375, "epoch": 0.438, "grad_norm": 1.1826071787259753, "kl": 0.24609375, "learning_rate": 1.2110146199019098e-06, "loss": 0.002538088709115982, "memory(GiB)": 147.17, "reward": 1.7834502458572388, "reward_std": 0.3092948794364929, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3220008611679077, "rewards/EvidenceHallucination/std": 0.38896769285202026, "rewards/Evidence_Num_Record/mean": 4.547619342803955, "rewards/Evidence_Num_Record/std": 2.724794626235962, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.585716724395752, "rewards/VideoAccuracy/std": 0.4448087513446808, "step": 438, "train_speed(iter/s)": 0.01803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 762.0, "completions/mean_length": 496.0238037109375, "completions/min_length": 328.0, "entropy/max": 1.078125, "entropy/mean": 0.4453125, "entropy/min": 0.23828125, "epoch": 0.439, "grad_norm": 1.2076013926959337, "kl": 0.298828125, "learning_rate": 1.207911690817759e-06, "loss": 0.0030236421152949333, "memory(GiB)": 147.17, "reward": 1.730692744255066, "reward_std": 0.30598652362823486, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49944278597831726, "rewards/EvidenceHallucination/std": 0.43106022477149963, "rewards/Evidence_Num_Record/mean": 4.952381134033203, "rewards/Evidence_Num_Record/std": 1.360637903213501, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.60699462890625, "rewards/VideoAccuracy/std": 0.45530325174331665, "step": 439, "train_speed(iter/s)": 0.018035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 587.0, "completions/mean_length": 409.3571472167969, "completions/min_length": 275.0, "entropy/max": 0.52734375, "entropy/mean": 0.3984375, "entropy/min": 0.228515625, "epoch": 0.44, "grad_norm": 1.3589092248962515, "kl": 0.3515625, "learning_rate": 1.2048066680651908e-06, "loss": 0.003545670537278056, "memory(GiB)": 147.17, "reward": 1.5632761716842651, "reward_std": 0.2434949278831482, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2878570556640625, "rewards/EvidenceHallucination/std": 0.43739238381385803, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.9891983866691589, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.4485619068145752, "rewards/VideoAccuracy/std": 0.5861182808876038, "step": 440, "train_speed(iter/s)": 0.018045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 598.0, "completions/mean_length": 432.8095397949219, "completions/min_length": 351.0, "entropy/max": 0.546875, "entropy/mean": 0.291015625, "entropy/min": 0.142578125, "epoch": 0.441, "grad_norm": 0.9782030369969055, "kl": 0.279296875, "learning_rate": 1.2016995829117486e-06, "loss": 0.002998619107529521, "memory(GiB)": 147.17, "reward": 2.205361843109131, "reward_std": 0.05849936604499817, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6928436160087585, "rewards/EvidenceHallucination/std": 0.3390180766582489, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 0.8621610999107361, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8667927980422974, "rewards/VideoAccuracy/std": 0.43811073899269104, "step": 441, "train_speed(iter/s)": 0.01805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1113.0, "completions/mean_length": 509.5, "completions/min_length": 319.0, "entropy/max": 1.9140625, "entropy/mean": 0.4453125, "entropy/min": 0.193359375, "epoch": 0.442, "grad_norm": 1.1929971126976804, "kl": 0.296875, "learning_rate": 1.1985904666457453e-06, "loss": 0.0030174236744642258, "memory(GiB)": 147.17, "reward": 1.6967631578445435, "reward_std": 0.2626940608024597, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5076243877410889, "rewards/EvidenceHallucination/std": 0.4318709969520569, "rewards/Evidence_Num_Record/mean": 5.428571701049805, "rewards/Evidence_Num_Record/std": 2.670003890991211, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679577350616455, "step": 442, "train_speed(iter/s)": 0.018051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 724.0, "completions/mean_length": 436.90478515625, "completions/min_length": 263.0, "entropy/max": 0.55078125, "entropy/mean": 0.419921875, "entropy/min": 0.29296875, "epoch": 0.443, "grad_norm": 1.5673902316475388, "kl": 0.322265625, "learning_rate": 1.1954793505759482e-06, "loss": 0.003226308850571513, "memory(GiB)": 147.17, "reward": 1.6576652526855469, "reward_std": 0.3279918432235718, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.327426940202713, "rewards/EvidenceHallucination/std": 0.4299605190753937, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.3716899156570435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.5350369811058044, "rewards/VideoAccuracy/std": 0.4003976285457611, "step": 443, "train_speed(iter/s)": 0.018056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 800.0, "completions/mean_length": 432.452392578125, "completions/min_length": 287.0, "entropy/max": 0.50390625, "entropy/mean": 0.322265625, "entropy/min": 0.1298828125, "epoch": 0.444, "grad_norm": 1.290247829605721, "kl": 0.330078125, "learning_rate": 1.192366266031261e-06, "loss": 0.003352985717356205, "memory(GiB)": 147.17, "reward": 2.2507922649383545, "reward_std": 0.25693458318710327, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6864717602729797, "rewards/EvidenceHallucination/std": 0.3530677258968353, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 1.0714867115020752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.946831226348877, "rewards/VideoAccuracy/std": 0.47743329405784607, "step": 444, "train_speed(iter/s)": 0.018058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1707.0, "completions/mean_length": 541.0238037109375, "completions/min_length": 275.0, "entropy/max": 0.703125, "entropy/mean": 0.33984375, "entropy/min": 0.1298828125, "epoch": 0.445, "grad_norm": 1.3235877313949866, "kl": 0.24609375, "learning_rate": 1.1892512443604101e-06, "loss": 0.0026101216208189726, "memory(GiB)": 147.17, "reward": 1.735228419303894, "reward_std": 0.3607367277145386, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42023664712905884, "rewards/EvidenceHallucination/std": 0.3911043405532837, "rewards/Evidence_Num_Record/mean": 5.476190567016602, "rewards/Evidence_Num_Record/std": 3.535451650619507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5845143795013428, "rewards/VideoAccuracy/std": 0.5250858068466187, "step": 445, "train_speed(iter/s)": 0.018051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 800.0, "completions/mean_length": 471.6190490722656, "completions/min_length": 334.0, "entropy/max": 0.51953125, "entropy/mean": 0.37109375, "entropy/min": 0.23828125, "epoch": 0.446, "grad_norm": 1.009163131938503, "kl": 0.328125, "learning_rate": 1.18613431693163e-06, "loss": 0.0032964288257062435, "memory(GiB)": 147.17, "reward": 1.609557032585144, "reward_std": 0.15845243632793427, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3692663609981537, "rewards/EvidenceHallucination/std": 0.4354756474494934, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.4152398109436035, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.4690369665622711, "rewards/VideoAccuracy/std": 0.4111632704734802, "step": 446, "train_speed(iter/s)": 0.018053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 794.0, "completions/mean_length": 459.23809814453125, "completions/min_length": 288.0, "entropy/max": 0.51953125, "entropy/mean": 0.3828125, "entropy/min": 0.2275390625, "epoch": 0.447, "grad_norm": 1.1351614156474819, "kl": 0.333984375, "learning_rate": 1.1830155151323444e-06, "loss": 0.0033620535396039486, "memory(GiB)": 147.17, "reward": 1.8145294189453125, "reward_std": 0.2776869535446167, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4702847898006439, "rewards/EvidenceHallucination/std": 0.42905890941619873, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.3710546493530273, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6204724311828613, "rewards/VideoAccuracy/std": 0.5245855450630188, "step": 447, "train_speed(iter/s)": 0.018059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 809.0, "completions/mean_length": 505.0, "completions/min_length": 309.0, "entropy/max": 0.67578125, "entropy/mean": 0.310546875, "entropy/min": 0.158203125, "epoch": 0.448, "grad_norm": 1.031082296659848, "kl": 0.26953125, "learning_rate": 1.1798948703688538e-06, "loss": 0.0027520672883838415, "memory(GiB)": 147.17, "reward": 1.888918399810791, "reward_std": 0.21466487646102905, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4228678345680237, "rewards/EvidenceHallucination/std": 0.4388137459754944, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.2505515813827515, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6710115075111389, "rewards/VideoAccuracy/std": 0.5001021027565002, "step": 448, "train_speed(iter/s)": 0.01806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 990.0, "completions/mean_length": 478.6428527832031, "completions/min_length": 303.0, "entropy/max": 0.91015625, "entropy/mean": 0.41796875, "entropy/min": 0.275390625, "epoch": 0.449, "grad_norm": 1.2527013531866111, "kl": 0.314453125, "learning_rate": 1.1767724140660156e-06, "loss": 0.003172614611685276, "memory(GiB)": 147.17, "reward": 1.6928980350494385, "reward_std": 0.21076981723308563, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5474359393119812, "rewards/EvidenceHallucination/std": 0.46206578612327576, "rewards/Evidence_Num_Record/mean": 4.642857074737549, "rewards/Evidence_Num_Record/std": 1.9978210926055908, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5500772595405579, "rewards/VideoAccuracy/std": 0.46221691370010376, "step": 449, "train_speed(iter/s)": 0.018056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 646.0, "completions/mean_length": 418.5, "completions/min_length": 287.0, "entropy/max": 0.68359375, "entropy/mean": 0.376953125, "entropy/min": 0.2177734375, "epoch": 0.45, "grad_norm": 1.3902251118272175, "kl": 0.341796875, "learning_rate": 1.1736481776669305e-06, "loss": 0.0034268698655068874, "memory(GiB)": 147.17, "reward": 1.7026755809783936, "reward_std": 0.2461749017238617, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4613755941390991, "rewards/EvidenceHallucination/std": 0.45595988631248474, "rewards/Evidence_Num_Record/mean": 3.5, "rewards/Evidence_Num_Record/std": 0.9173131585121155, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5484958291053772, "rewards/VideoAccuracy/std": 0.4273762106895447, "step": 450, "train_speed(iter/s)": 0.018064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 826.0, "completions/mean_length": 518.6666870117188, "completions/min_length": 282.0, "entropy/max": 0.51171875, "entropy/mean": 0.29296875, "entropy/min": 0.1591796875, "epoch": 0.451, "grad_norm": 1.1511943444979413, "kl": 0.279296875, "learning_rate": 1.1705221926326238e-06, "loss": 0.0028034679125994444, "memory(GiB)": 147.17, "reward": 2.190749168395996, "reward_std": 0.07624303549528122, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2890765070915222, "rewards/EvidenceHallucination/std": 0.41826358437538147, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 0.9830148816108704, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9329338073730469, "rewards/VideoAccuracy/std": 0.334219753742218, "step": 451, "train_speed(iter/s)": 0.018053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 887.0, "completions/mean_length": 494.4761962890625, "completions/min_length": 350.0, "entropy/max": 1.375, "entropy/mean": 0.453125, "entropy/min": 0.1962890625, "epoch": 0.452, "grad_norm": 1.2027789028409457, "kl": 0.3359375, "learning_rate": 1.1673944904417308e-06, "loss": 0.0033881841227412224, "memory(GiB)": 147.17, "reward": 1.7803906202316284, "reward_std": 0.3277139365673065, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5686198472976685, "rewards/EvidenceHallucination/std": 0.4226139485836029, "rewards/Evidence_Num_Record/mean": 4.6666669845581055, "rewards/Evidence_Num_Record/std": 1.7897632122039795, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 452, "train_speed(iter/s)": 0.018028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 871.0, "completions/mean_length": 516.5714111328125, "completions/min_length": 319.0, "entropy/max": 0.54296875, "entropy/mean": 0.416015625, "entropy/min": 0.2412109375, "epoch": 0.453, "grad_norm": 1.214475764928054, "kl": 0.2890625, "learning_rate": 1.164265102590177e-06, "loss": 0.0029415320605039597, "memory(GiB)": 147.17, "reward": 1.564728856086731, "reward_std": 0.20751957595348358, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28774741291999817, "rewards/EvidenceHallucination/std": 0.40104275941848755, "rewards/Evidence_Num_Record/mean": 4.690476417541504, "rewards/Evidence_Num_Record/std": 1.8804266452789307, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.4595602750778198, "rewards/VideoAccuracy/std": 0.4347084164619446, "step": 453, "train_speed(iter/s)": 0.018007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 758.0, "completions/mean_length": 508.8571472167969, "completions/min_length": 326.0, "entropy/max": 0.4765625, "entropy/mean": 0.333984375, "entropy/min": 0.142578125, "epoch": 0.454, "grad_norm": 1.1316807884478712, "kl": 0.310546875, "learning_rate": 1.1611340605908642e-06, "loss": 0.0031346462201327085, "memory(GiB)": 147.17, "reward": 1.9818377494812012, "reward_std": 0.237799733877182, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3703981935977936, "rewards/EvidenceHallucination/std": 0.4406714141368866, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.2623374462127686, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7410913109779358, "rewards/VideoAccuracy/std": 0.5927178859710693, "step": 454, "train_speed(iter/s)": 0.018021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1290.0, "completions/mean_length": 591.047607421875, "completions/min_length": 355.0, "entropy/max": 1.484375, "entropy/mean": 0.357421875, "entropy/min": 0.17578125, "epoch": 0.455, "grad_norm": 1.1066951817944102, "kl": 0.26953125, "learning_rate": 1.15800139597335e-06, "loss": 0.0028007570654153824, "memory(GiB)": 147.17, "reward": 1.9226216077804565, "reward_std": 0.21123027801513672, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44682320952415466, "rewards/EvidenceHallucination/std": 0.4188815951347351, "rewards/Evidence_Num_Record/mean": 5.285714149475098, "rewards/Evidence_Num_Record/std": 2.178450107574463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.766590416431427, "rewards/VideoAccuracy/std": 0.49839770793914795, "step": 455, "train_speed(iter/s)": 0.01802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 726.0, "completions/mean_length": 485.9761962890625, "completions/min_length": 334.0, "entropy/max": 0.65234375, "entropy/mean": 0.4140625, "entropy/min": 0.267578125, "epoch": 0.456, "grad_norm": 1.1556889569386688, "kl": 0.3125, "learning_rate": 1.1548671402835324e-06, "loss": 0.0031380036380141973, "memory(GiB)": 147.17, "reward": 1.6291717290878296, "reward_std": 0.16052106022834778, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29264798760414124, "rewards/EvidenceHallucination/std": 0.41749146580696106, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.4183138608932495, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008703470230103, "rewards/VideoAccuracy/mean": 0.484927773475647, "rewards/VideoAccuracy/std": 0.39783400297164917, "step": 456, "train_speed(iter/s)": 0.018014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 695.0, "completions/mean_length": 453.26190185546875, "completions/min_length": 331.0, "entropy/max": 0.53125, "entropy/mean": 0.388671875, "entropy/min": 0.2001953125, "epoch": 0.457, "grad_norm": 1.3107310194352497, "kl": 0.341796875, "learning_rate": 1.1517313250833317e-06, "loss": 0.0034421063028275967, "memory(GiB)": 147.17, "reward": 2.007448196411133, "reward_std": 0.0665355920791626, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.605268120765686, "rewards/EvidenceHallucination/std": 0.441053181886673, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.1168646812438965, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.786394476890564, "rewards/VideoAccuracy/std": 0.4613673686981201, "step": 457, "train_speed(iter/s)": 0.018015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/mean_length": 583.40478515625, "completions/min_length": 285.0, "entropy/max": 0.6875, "entropy/mean": 0.25390625, "entropy/min": 0.0849609375, "epoch": 0.458, "grad_norm": 0.9236477934543165, "kl": 0.2373046875, "learning_rate": 1.1485939819503716e-06, "loss": 0.002488694153726101, "memory(GiB)": 147.17, "reward": 2.173037052154541, "reward_std": 0.20351508259773254, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5917856693267822, "rewards/EvidenceHallucination/std": 0.3949899673461914, "rewards/Evidence_Num_Record/mean": 5.357142925262451, "rewards/Evidence_Num_Record/std": 2.895087718963623, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9213466048240662, "rewards/VideoAccuracy/std": 0.3844154477119446, "step": 458, "train_speed(iter/s)": 0.018013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1602.0, "completions/mean_length": 608.6666870117188, "completions/min_length": 386.0, "entropy/max": 0.90625, "entropy/mean": 0.3828125, "entropy/min": 0.22265625, "epoch": 0.459, "grad_norm": 1.0766498829438478, "kl": 0.279296875, "learning_rate": 1.1454551424776635e-06, "loss": 0.0028959258925169706, "memory(GiB)": 147.17, "reward": 1.6666834354400635, "reward_std": 0.13521941006183624, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4474688172340393, "rewards/EvidenceHallucination/std": 0.4277777075767517, "rewards/Evidence_Num_Record/mean": 6.5714287757873535, "rewards/Evidence_Num_Record/std": 3.82637095451355, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5486180782318115, "rewards/VideoAccuracy/std": 0.46456602215766907, "step": 459, "train_speed(iter/s)": 0.018008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 706.0, "completions/mean_length": 464.4761962890625, "completions/min_length": 278.0, "entropy/max": 0.5703125, "entropy/mean": 0.3828125, "entropy/min": 0.228515625, "epoch": 0.46, "grad_norm": 1.1448579776748715, "kl": 0.326171875, "learning_rate": 1.1423148382732853e-06, "loss": 0.0032780009787529707, "memory(GiB)": 147.17, "reward": 1.3398847579956055, "reward_std": 0.21157526969909668, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1676062047481537, "rewards/EvidenceHallucination/std": 0.3510550558567047, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 1.33999502658844, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500061869621277, "rewards/VideoAccuracy/mean": 0.2539824843406677, "rewards/VideoAccuracy/std": 0.35338884592056274, "step": 460, "train_speed(iter/s)": 0.018025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 585.0, "completions/mean_length": 452.0, "completions/min_length": 341.0, "entropy/max": 0.45703125, "entropy/mean": 0.328125, "entropy/min": 0.130859375, "epoch": 0.461, "grad_norm": 1.1064787379603562, "kl": 0.294921875, "learning_rate": 1.1391731009600653e-06, "loss": 0.0029807686805725098, "memory(GiB)": 147.17, "reward": 2.0820388793945312, "reward_std": 0.13201361894607544, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4889209568500519, "rewards/EvidenceHallucination/std": 0.43544960021972656, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.8164965510368347, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.7937784790992737, "rewards/VideoAccuracy/std": 0.4980928599834442, "step": 461, "train_speed(iter/s)": 0.018026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 737.2619018554688, "completions/min_length": 353.0, "entropy/max": 0.71484375, "entropy/mean": 0.298828125, "entropy/min": 0.0703125, "epoch": 0.462, "grad_norm": 0.9776750470192334, "kl": 0.2373046875, "learning_rate": 1.1360299621752643e-06, "loss": 0.0026709954254329205, "memory(GiB)": 147.17, "reward": 1.6693943738937378, "reward_std": 0.11008346080780029, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4303053915500641, "rewards/EvidenceHallucination/std": 0.38703253865242004, "rewards/Evidence_Num_Record/mean": 6.523809432983398, "rewards/Evidence_Num_Record/std": 4.379765510559082, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5952380895614624, "rewards/VideoAccuracy/std": 0.49679577350616455, "step": 462, "train_speed(iter/s)": 0.01801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 799.0, "completions/mean_length": 509.8095397949219, "completions/min_length": 346.0, "entropy/max": 0.66015625, "entropy/mean": 0.4140625, "entropy/min": 0.251953125, "epoch": 0.463, "grad_norm": 1.2944413633743106, "kl": 0.302734375, "learning_rate": 1.1328854535702542e-06, "loss": 0.0030356289353221655, "memory(GiB)": 147.17, "reward": 1.8189336061477661, "reward_std": 0.32026880979537964, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5183867812156677, "rewards/EvidenceHallucination/std": 0.4302096664905548, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.4305444955825806, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6485896110534668, "rewards/VideoAccuracy/std": 0.3347703218460083, "step": 463, "train_speed(iter/s)": 0.018013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 681.0, "completions/mean_length": 478.6428527832031, "completions/min_length": 312.0, "entropy/max": 0.53125, "entropy/mean": 0.33203125, "entropy/min": 0.2080078125, "epoch": 0.464, "grad_norm": 1.2620868621492844, "kl": 0.337890625, "learning_rate": 1.1297396068102017e-06, "loss": 0.0034277853555977345, "memory(GiB)": 147.17, "reward": 2.0954244136810303, "reward_std": 0.13253745436668396, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.47420939803123474, "rewards/EvidenceHallucination/std": 0.4610733687877655, "rewards/Evidence_Num_Record/mean": 4.309524059295654, "rewards/Evidence_Num_Record/std": 1.2970528602600098, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8339158296585083, "rewards/VideoAccuracy/std": 0.5612301826477051, "step": 464, "train_speed(iter/s)": 0.01802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/mean_length": 600.357177734375, "completions/min_length": 287.0, "entropy/max": 1.546875, "entropy/mean": 0.392578125, "entropy/min": 0.07373046875, "epoch": 0.465, "grad_norm": 0.9052743145971144, "kl": 0.2431640625, "learning_rate": 1.1265924535737492e-06, "loss": 0.0025208794977515936, "memory(GiB)": 147.17, "reward": 1.5555534362792969, "reward_std": 0.19387571513652802, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16526667773723602, "rewards/EvidenceHallucination/std": 0.31730732321739197, "rewards/Evidence_Num_Record/mean": 5.5714287757873535, "rewards/Evidence_Num_Record/std": 2.188025712966919, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4558333456516266, "rewards/VideoAccuracy/std": 0.4882560968399048, "step": 465, "train_speed(iter/s)": 0.018016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 992.0, "completions/mean_length": 538.4761962890625, "completions/min_length": 308.0, "entropy/max": 0.62890625, "entropy/mean": 0.3828125, "entropy/min": 0.265625, "epoch": 0.466, "grad_norm": 1.1128596821312087, "kl": 0.298828125, "learning_rate": 1.1234440255526948e-06, "loss": 0.0030189414974302053, "memory(GiB)": 147.17, "reward": 1.7855602502822876, "reward_std": 0.2355557084083557, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6041164398193359, "rewards/EvidenceHallucination/std": 0.44853171706199646, "rewards/Evidence_Num_Record/mean": 5.1666669845581055, "rewards/Evidence_Num_Record/std": 2.1630678176879883, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6314036250114441, "rewards/VideoAccuracy/std": 0.40444111824035645, "step": 466, "train_speed(iter/s)": 0.018015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 666.0, "completions/mean_length": 462.952392578125, "completions/min_length": 314.0, "entropy/max": 0.5234375, "entropy/mean": 0.36328125, "entropy/min": 0.240234375, "epoch": 0.467, "grad_norm": 1.1542888484351184, "kl": 0.322265625, "learning_rate": 1.1202943544516735e-06, "loss": 0.003247791901230812, "memory(GiB)": 147.17, "reward": 2.08418869972229, "reward_std": 0.15974998474121094, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6274665594100952, "rewards/EvidenceHallucination/std": 0.440139502286911, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.0722993612289429, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8586952686309814, "rewards/VideoAccuracy/std": 0.5073198676109314, "step": 467, "train_speed(iter/s)": 0.017993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1546.0, "completions/mean_length": 541.40478515625, "completions/min_length": 270.0, "entropy/max": 0.6875, "entropy/mean": 0.328125, "entropy/min": 0.12451171875, "epoch": 0.468, "grad_norm": 1.1089900123712406, "kl": 0.267578125, "learning_rate": 1.1171434719878383e-06, "loss": 0.00274701789021492, "memory(GiB)": 147.17, "reward": 1.9164142608642578, "reward_std": 0.12607529759407043, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45135653018951416, "rewards/EvidenceHallucination/std": 0.38880568742752075, "rewards/Evidence_Num_Record/mean": 5.285714149475098, "rewards/Evidence_Num_Record/std": 3.344348907470703, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.692809522151947, "rewards/VideoAccuracy/std": 0.5157350301742554, "step": 468, "train_speed(iter/s)": 0.01799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 952.0, "completions/mean_length": 554.0714111328125, "completions/min_length": 402.0, "entropy/max": 0.5546875, "entropy/mean": 0.3828125, "entropy/min": 0.224609375, "epoch": 0.469, "grad_norm": 1.2050191357372941, "kl": 0.287109375, "learning_rate": 1.1139914098905405e-06, "loss": 0.0028876017313450575, "memory(GiB)": 147.17, "reward": 1.5697916746139526, "reward_std": 0.3003442883491516, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3716760575771332, "rewards/EvidenceHallucination/std": 0.4212680459022522, "rewards/Evidence_Num_Record/mean": 5.642857074737549, "rewards/Evidence_Num_Record/std": 1.7918709516525269, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.46212318539619446, "rewards/VideoAccuracy/std": 0.4985504448413849, "step": 469, "train_speed(iter/s)": 0.017994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 975.0, "completions/mean_length": 500.6428527832031, "completions/min_length": 318.0, "entropy/max": 0.578125, "entropy/mean": 0.392578125, "entropy/min": 0.2021484375, "epoch": 0.47, "grad_norm": 1.2681006412044444, "kl": 0.318359375, "learning_rate": 1.110838199901011e-06, "loss": 0.003217041026800871, "memory(GiB)": 147.17, "reward": 1.4075568914413452, "reward_std": 0.23479227721691132, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2022327035665512, "rewards/EvidenceHallucination/std": 0.39306485652923584, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 2.091407060623169, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500061869621277, "rewards/VideoAccuracy/mean": 0.3147293031215668, "rewards/VideoAccuracy/std": 0.4137459993362427, "step": 470, "train_speed(iter/s)": 0.018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 839.0, "completions/mean_length": 524.5238037109375, "completions/min_length": 345.0, "entropy/max": 0.50390625, "entropy/mean": 0.330078125, "entropy/min": 0.1484375, "epoch": 0.471, "grad_norm": 0.9793066015071863, "kl": 0.275390625, "learning_rate": 1.107683873772039e-06, "loss": 0.0029692240059375763, "memory(GiB)": 147.17, "reward": 1.9420217275619507, "reward_std": 0.08886364102363586, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41147086024284363, "rewards/EvidenceHallucination/std": 0.4166224002838135, "rewards/Evidence_Num_Record/mean": 4.547619342803955, "rewards/Evidence_Num_Record/std": 1.4684051275253296, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.6644895076751709, "rewards/VideoAccuracy/std": 0.44928592443466187, "step": 471, "train_speed(iter/s)": 0.018001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 980.0, "completions/mean_length": 542.452392578125, "completions/min_length": 361.0, "entropy/max": 0.66015625, "entropy/mean": 0.390625, "entropy/min": 0.255859375, "epoch": 0.472, "grad_norm": 1.1380247355250153, "kl": 0.291015625, "learning_rate": 1.1045284632676535e-06, "loss": 0.00299941748380661, "memory(GiB)": 147.17, "reward": 1.785868763923645, "reward_std": 0.012286361306905746, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5960100889205933, "rewards/EvidenceHallucination/std": 0.4448285698890686, "rewards/Evidence_Num_Record/mean": 5.190476417541504, "rewards/Evidence_Num_Record/std": 1.5654516220092773, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6666666865348816, "rewards/VideoAccuracy/std": 0.47711876034736633, "step": 472, "train_speed(iter/s)": 0.01797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 785.0, "completions/mean_length": 526.3095092773438, "completions/min_length": 373.0, "entropy/max": 0.60546875, "entropy/mean": 0.421875, "entropy/min": 0.25, "epoch": 0.473, "grad_norm": 1.1189761887851029, "kl": 0.275390625, "learning_rate": 1.1013720001628034e-06, "loss": 0.002974391682073474, "memory(GiB)": 147.17, "reward": 1.4630788564682007, "reward_std": 0.4351825416088104, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.25441774725914, "rewards/EvidenceHallucination/std": 0.3907027244567871, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 1.311965823173523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.35029053688049316, "rewards/VideoAccuracy/std": 0.4316782057285309, "step": 473, "train_speed(iter/s)": 0.017975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1089.0, "completions/mean_length": 532.1190795898438, "completions/min_length": 335.0, "entropy/max": 0.48828125, "entropy/mean": 0.3515625, "entropy/min": 0.193359375, "epoch": 0.474, "grad_norm": 1.2260793976716424, "kl": 0.302734375, "learning_rate": 1.0982145162430371e-06, "loss": 0.0030711570288985968, "memory(GiB)": 147.17, "reward": 1.9970827102661133, "reward_std": 0.22381845116615295, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48039278388023376, "rewards/EvidenceHallucination/std": 0.4535634219646454, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 1.5970356464385986, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7343373894691467, "rewards/VideoAccuracy/std": 0.5738323926925659, "step": 474, "train_speed(iter/s)": 0.01797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/mean_length": 588.2142944335938, "completions/min_length": 330.0, "entropy/max": 1.2109375, "entropy/mean": 0.3046875, "entropy/min": 0.1376953125, "epoch": 0.475, "grad_norm": 2.7216745852020106, "kl": 0.52734375, "learning_rate": 1.0950560433041825e-06, "loss": 0.0056946794502437115, "memory(GiB)": 147.17, "reward": 2.2821545600891113, "reward_std": 0.06225450336933136, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6848945021629333, "rewards/EvidenceHallucination/std": 0.26604753732681274, "rewards/Evidence_Num_Record/mean": 5.857142925262451, "rewards/Evidence_Num_Record/std": 3.2277109622955322, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 1.0785090923309326, "rewards/VideoAccuracy/std": 0.12712201476097107, "step": 475, "train_speed(iter/s)": 0.017961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 842.0, "completions/mean_length": 538.1190795898438, "completions/min_length": 373.0, "entropy/max": 0.5703125, "entropy/mean": 0.39453125, "entropy/min": 0.2197265625, "epoch": 0.476, "grad_norm": 1.338200222826453, "kl": 0.287109375, "learning_rate": 1.0918966131520276e-06, "loss": 0.002909306436777115, "memory(GiB)": 147.17, "reward": 1.718570351600647, "reward_std": 0.2557729482650757, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4614545404911041, "rewards/EvidenceHallucination/std": 0.4442642629146576, "rewards/Evidence_Num_Record/mean": 4.761904716491699, "rewards/Evidence_Num_Record/std": 1.1647155284881592, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5643746256828308, "rewards/VideoAccuracy/std": 0.4282829463481903, "step": 476, "train_speed(iter/s)": 0.017959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 627.7619018554688, "completions/min_length": 367.0, "entropy/max": 0.5390625, "entropy/mean": 0.34765625, "entropy/min": 0.08056640625, "epoch": 0.477, "grad_norm": 1.1502767846006685, "kl": 0.275390625, "learning_rate": 1.0887362576019981e-06, "loss": 0.002875441685318947, "memory(GiB)": 147.17, "reward": 2.072842836380005, "reward_std": 0.15147219598293304, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.701572835445404, "rewards/EvidenceHallucination/std": 0.3567623794078827, "rewards/Evidence_Num_Record/mean": 5.833333492279053, "rewards/Evidence_Num_Record/std": 5.3278679847717285, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8444330096244812, "rewards/VideoAccuracy/std": 0.42393815517425537, "step": 477, "train_speed(iter/s)": 0.017935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1164.0, "completions/mean_length": 547.1904907226562, "completions/min_length": 305.0, "entropy/max": 0.70703125, "entropy/mean": 0.31640625, "entropy/min": 0.1376953125, "epoch": 0.478, "grad_norm": 1.0997246913390766, "kl": 0.25390625, "learning_rate": 1.0855750084788398e-06, "loss": 0.00258713960647583, "memory(GiB)": 147.17, "reward": 2.1575942039489746, "reward_std": 0.19384567439556122, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5576894283294678, "rewards/EvidenceHallucination/std": 0.4132033884525299, "rewards/Evidence_Num_Record/mean": 4.6666669845581055, "rewards/Evidence_Num_Record/std": 1.8033393621444702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9127230048179626, "rewards/VideoAccuracy/std": 0.4475727081298828, "step": 478, "train_speed(iter/s)": 0.017937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 573.5238037109375, "completions/min_length": 382.0, "entropy/max": 0.75, "entropy/mean": 0.3828125, "entropy/min": 0.1015625, "epoch": 0.479, "grad_norm": 1.0940228515662154, "kl": 0.255859375, "learning_rate": 1.0824128976162962e-06, "loss": 0.002665138803422451, "memory(GiB)": 147.17, "reward": 1.4639655351638794, "reward_std": 0.19348707795143127, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30238524079322815, "rewards/EvidenceHallucination/std": 0.40099501609802246, "rewards/Evidence_Num_Record/mean": 5.404761791229248, "rewards/Evidence_Num_Record/std": 4.633175849914551, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.38205984234809875, "rewards/VideoAccuracy/std": 0.49067986011505127, "step": 479, "train_speed(iter/s)": 0.017913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 817.0, "completions/mean_length": 525.6666870117188, "completions/min_length": 370.0, "entropy/max": 0.51171875, "entropy/mean": 0.388671875, "entropy/min": 0.25, "epoch": 0.48, "grad_norm": 1.2701543867108012, "kl": 0.265625, "learning_rate": 1.0792499568567884e-06, "loss": 0.0026680571027100086, "memory(GiB)": 147.17, "reward": 1.519570231437683, "reward_std": 0.22163185477256775, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2485884577035904, "rewards/EvidenceHallucination/std": 0.42518365383148193, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.6709585189819336, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.40794771909713745, "rewards/VideoAccuracy/std": 0.508557140827179, "step": 480, "train_speed(iter/s)": 0.01792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 813.0, "completions/mean_length": 513.9761962890625, "completions/min_length": 369.0, "entropy/max": 0.609375, "entropy/mean": 0.31640625, "entropy/min": 0.12109375, "epoch": 0.481, "grad_norm": 1.2578409875339809, "kl": 0.263671875, "learning_rate": 1.076086218051095e-06, "loss": 0.0026590488851070404, "memory(GiB)": 147.17, "reward": 2.2940688133239746, "reward_std": 0.1663028597831726, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6069757342338562, "rewards/EvidenceHallucination/std": 0.4343172311782837, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 0.9927144050598145, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9774355292320251, "rewards/VideoAccuracy/std": 0.33955731987953186, "step": 481, "train_speed(iter/s)": 0.017919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/mean_length": 618.0238037109375, "completions/min_length": 317.0, "entropy/max": 1.34375, "entropy/mean": 0.455078125, "entropy/min": 0.251953125, "epoch": 0.482, "grad_norm": 0.8808210235719656, "kl": 0.2275390625, "learning_rate": 1.0729217130580309e-06, "loss": 0.002350968774408102, "memory(GiB)": 147.17, "reward": 1.387406587600708, "reward_std": 0.24863937497138977, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27036601305007935, "rewards/EvidenceHallucination/std": 0.39469394087791443, "rewards/Evidence_Num_Record/mean": 6.476190567016602, "rewards/Evidence_Num_Record/std": 4.043893814086914, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3333333432674408, "rewards/VideoAccuracy/std": 0.47711870074272156, "step": 482, "train_speed(iter/s)": 0.017909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/mean_length": 594.8333740234375, "completions/min_length": 368.0, "entropy/max": 0.6875, "entropy/mean": 0.3828125, "entropy/min": 0.23828125, "epoch": 0.483, "grad_norm": 1.115115173966916, "kl": 0.271484375, "learning_rate": 1.069756473744125e-06, "loss": 0.0027435943484306335, "memory(GiB)": 147.17, "reward": 1.4424556493759155, "reward_std": 0.2692454755306244, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2950797379016876, "rewards/EvidenceHallucination/std": 0.4263550043106079, "rewards/Evidence_Num_Record/mean": 5.5, "rewards/Evidence_Num_Record/std": 1.7976950407028198, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.3548682928085327, "rewards/VideoAccuracy/std": 0.41278472542762756, "step": 483, "train_speed(iter/s)": 0.017912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 784.0, "completions/mean_length": 480.6428527832031, "completions/min_length": 338.0, "entropy/max": 0.5, "entropy/mean": 0.341796875, "entropy/min": 0.1630859375, "epoch": 0.484, "grad_norm": 1.2047242245282004, "kl": 0.302734375, "learning_rate": 1.066590531983304e-06, "loss": 0.0030420708935707808, "memory(GiB)": 147.17, "reward": 2.144254207611084, "reward_std": 0.12368159741163254, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5195605754852295, "rewards/EvidenceHallucination/std": 0.38605642318725586, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.2176117897033691, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.8784372210502625, "rewards/VideoAccuracy/std": 0.46715590357780457, "step": 484, "train_speed(iter/s)": 0.017913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/mean_length": 587.7619018554688, "completions/min_length": 310.0, "entropy/max": 0.6484375, "entropy/mean": 0.357421875, "entropy/min": 0.150390625, "epoch": 0.485, "grad_norm": 1.0202696321221674, "kl": 0.2275390625, "learning_rate": 1.0634239196565644e-06, "loss": 0.00234436197206378, "memory(GiB)": 147.17, "reward": 1.95667564868927, "reward_std": 0.18724535405635834, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.520356297492981, "rewards/EvidenceHallucination/std": 0.39924415946006775, "rewards/Evidence_Num_Record/mean": 5.761904716491699, "rewards/Evidence_Num_Record/std": 2.8353991508483887, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7859376668930054, "rewards/VideoAccuracy/std": 0.4797806143760681, "step": 485, "train_speed(iter/s)": 0.017905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1612.0, "completions/mean_length": 582.547607421875, "completions/min_length": 320.0, "entropy/max": 0.6328125, "entropy/mean": 0.359375, "entropy/min": 0.158203125, "epoch": 0.486, "grad_norm": 1.2034500202061464, "kl": 0.2451171875, "learning_rate": 1.0602566686516584e-06, "loss": 0.0025604632683098316, "memory(GiB)": 147.17, "reward": 1.624455451965332, "reward_std": 0.11654912680387497, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3282482922077179, "rewards/EvidenceHallucination/std": 0.3944683372974396, "rewards/Evidence_Num_Record/mean": 5.1666669845581055, "rewards/Evidence_Num_Record/std": 2.713261604309082, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5254723429679871, "rewards/VideoAccuracy/std": 0.4686095714569092, "step": 486, "train_speed(iter/s)": 0.017893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 610.1904907226562, "completions/min_length": 335.0, "entropy/max": 0.7265625, "entropy/mean": 0.390625, "entropy/min": 0.16015625, "epoch": 0.487, "grad_norm": 1.1250653949961946, "kl": 0.265625, "learning_rate": 1.057088810862768e-06, "loss": 0.0027342557441443205, "memory(GiB)": 147.17, "reward": 1.7513415813446045, "reward_std": 0.28683042526245117, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4083775281906128, "rewards/EvidenceHallucination/std": 0.4645242989063263, "rewards/Evidence_Num_Record/mean": 6.142857074737549, "rewards/Evidence_Num_Record/std": 5.973810195922852, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.5815707445144653, "rewards/VideoAccuracy/std": 0.6242651343345642, "step": 487, "train_speed(iter/s)": 0.017884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1996.0, "completions/mean_length": 640.5714111328125, "completions/min_length": 379.0, "entropy/max": 0.5, "entropy/mean": 0.302734375, "entropy/min": 0.1669921875, "epoch": 0.488, "grad_norm": 1.0456907944976346, "kl": 0.2333984375, "learning_rate": 1.053920378190186e-06, "loss": 0.0024078013375401497, "memory(GiB)": 147.17, "reward": 1.766273856163025, "reward_std": 0.28851383924484253, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.320260226726532, "rewards/EvidenceHallucination/std": 0.40128740668296814, "rewards/Evidence_Num_Record/mean": 6.214285850524902, "rewards/Evidence_Num_Record/std": 5.224406719207764, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5688884854316711, "rewards/VideoAccuracy/std": 0.4637153446674347, "step": 488, "train_speed(iter/s)": 0.017875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2289.0, "completions/mean_length": 630.0714111328125, "completions/min_length": 373.0, "entropy/max": 0.65234375, "entropy/mean": 0.392578125, "entropy/min": 0.1455078125, "epoch": 0.489, "grad_norm": 1.1567690080526456, "kl": 0.2265625, "learning_rate": 1.0507514025399942e-06, "loss": 0.002367014531046152, "memory(GiB)": 147.17, "reward": 1.8700870275497437, "reward_std": 0.1770918369293213, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5409119129180908, "rewards/EvidenceHallucination/std": 0.35729101300239563, "rewards/Evidence_Num_Record/mean": 5.404761791229248, "rewards/Evidence_Num_Record/std": 2.164677858352661, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.761904776096344, "rewards/VideoAccuracy/std": 0.43108054995536804, "step": 489, "train_speed(iter/s)": 0.017858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 988.0, "completions/mean_length": 495.4285888671875, "completions/min_length": 325.0, "entropy/max": 0.52734375, "entropy/mean": 0.3984375, "entropy/min": 0.19921875, "epoch": 0.49, "grad_norm": 1.065173367019639, "kl": 0.279296875, "learning_rate": 1.0475819158237424e-06, "loss": 0.0028036325238645077, "memory(GiB)": 147.17, "reward": 1.3710325956344604, "reward_std": 0.24134787917137146, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23277650773525238, "rewards/EvidenceHallucination/std": 0.3827642798423767, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.7708439826965332, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.2578105926513672, "rewards/VideoAccuracy/std": 0.31216996908187866, "step": 490, "train_speed(iter/s)": 0.017838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/mean_length": 474.0952453613281, "completions/min_length": 295.0, "entropy/max": 0.5078125, "entropy/mean": 0.318359375, "entropy/min": 0.12890625, "epoch": 0.491, "grad_norm": 1.0706580417180322, "kl": 0.26953125, "learning_rate": 1.0444119499581261e-06, "loss": 0.0027219271287322044, "memory(GiB)": 147.17, "reward": 2.362217426300049, "reward_std": 0.09849338233470917, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5614845156669617, "rewards/EvidenceHallucination/std": 0.41747772693634033, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 0.9830148816108704, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.0499204397201538, "rewards/VideoAccuracy/std": 0.36109185218811035, "step": 491, "train_speed(iter/s)": 0.017842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2059.0, "completions/mean_length": 746.5238037109375, "completions/min_length": 429.0, "entropy/max": 1.5859375, "entropy/mean": 0.41015625, "entropy/min": 0.1630859375, "epoch": 0.492, "grad_norm": 0.931213143371818, "kl": 0.21875, "learning_rate": 1.041241536864667e-06, "loss": 0.002338796854019165, "memory(GiB)": 147.17, "reward": 1.6701347827911377, "reward_std": 0.11329877376556396, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4935309588909149, "rewards/EvidenceHallucination/std": 0.4419654309749603, "rewards/Evidence_Num_Record/mean": 7.904762268066406, "rewards/Evidence_Num_Record/std": 4.0713725090026855, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5714285969734192, "rewards/VideoAccuracy/std": 0.5008703470230103, "step": 492, "train_speed(iter/s)": 0.01782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 850.0, "completions/mean_length": 518.2619018554688, "completions/min_length": 362.0, "entropy/max": 0.55859375, "entropy/mean": 0.408203125, "entropy/min": 0.224609375, "epoch": 0.493, "grad_norm": 1.178755812522848, "kl": 0.27734375, "learning_rate": 1.03807070846939e-06, "loss": 0.002808667253702879, "memory(GiB)": 147.17, "reward": 1.5074771642684937, "reward_std": 0.15796434879302979, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2707264721393585, "rewards/EvidenceHallucination/std": 0.405273973941803, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.4256649017333984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.41999852657318115, "rewards/VideoAccuracy/std": 0.4554305672645569, "step": 493, "train_speed(iter/s)": 0.017798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1074.0, "completions/mean_length": 543.0714111328125, "completions/min_length": 286.0, "entropy/max": 0.515625, "entropy/mean": 0.32421875, "entropy/min": 0.1435546875, "epoch": 0.494, "grad_norm": 1.0798772311424434, "kl": 0.279296875, "learning_rate": 1.034899496702501e-06, "loss": 0.0028433247935026884, "memory(GiB)": 147.17, "reward": 2.1295299530029297, "reward_std": 0.2256758213043213, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4543311297893524, "rewards/EvidenceHallucination/std": 0.46849706768989563, "rewards/Evidence_Num_Record/mean": 4.857142925262451, "rewards/Evidence_Num_Record/std": 1.6315699815750122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8719971179962158, "rewards/VideoAccuracy/std": 0.5661816596984863, "step": 494, "train_speed(iter/s)": 0.017803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1678.0, "completions/mean_length": 596.5238037109375, "completions/min_length": 270.0, "entropy/max": 2.265625, "entropy/mean": 0.447265625, "entropy/min": 0.1435546875, "epoch": 0.495, "grad_norm": 1.0783150340497494, "kl": 0.24609375, "learning_rate": 1.0317279334980677e-06, "loss": 0.0025517649482935667, "memory(GiB)": 147.17, "reward": 1.7263377904891968, "reward_std": 0.16748467087745667, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3050219416618347, "rewards/EvidenceHallucination/std": 0.38823720812797546, "rewards/Evidence_Num_Record/mean": 5.928571701049805, "rewards/Evidence_Num_Record/std": 3.4738948345184326, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.5653334856033325, "rewards/VideoAccuracy/std": 0.4182286858558655, "step": 495, "train_speed(iter/s)": 0.017799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1138.0, "completions/mean_length": 542.7857055664062, "completions/min_length": 384.0, "entropy/max": 0.6171875, "entropy/mean": 0.365234375, "entropy/min": 0.2275390625, "epoch": 0.496, "grad_norm": 1.1036716393029964, "kl": 0.259765625, "learning_rate": 1.0285560507936961e-06, "loss": 0.0026476779021322727, "memory(GiB)": 147.17, "reward": 1.4483938217163086, "reward_std": 0.36713075637817383, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2876743972301483, "rewards/EvidenceHallucination/std": 0.43775710463523865, "rewards/Evidence_Num_Record/mean": 5.285714149475098, "rewards/Evidence_Num_Record/std": 2.3919143676757812, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3575255572795868, "rewards/VideoAccuracy/std": 0.4473797082901001, "step": 496, "train_speed(iter/s)": 0.01779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1597.0, "completions/mean_length": 579.0, "completions/min_length": 345.0, "entropy/max": 0.64453125, "entropy/mean": 0.34375, "entropy/min": 0.13671875, "epoch": 0.497, "grad_norm": 1.179711608738581, "kl": 0.275390625, "learning_rate": 1.0253838805302104e-06, "loss": 0.0028095985762774944, "memory(GiB)": 147.17, "reward": 2.179518699645996, "reward_std": 0.2409399449825287, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6658817529678345, "rewards/EvidenceHallucination/std": 0.39015546441078186, "rewards/Evidence_Num_Record/mean": 5.38095235824585, "rewards/Evidence_Num_Record/std": 3.1541872024536133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.9463424682617188, "rewards/VideoAccuracy/std": 0.3125475347042084, "step": 497, "train_speed(iter/s)": 0.017781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1561.0, "completions/mean_length": 542.8809814453125, "completions/min_length": 307.0, "entropy/max": 0.66015625, "entropy/mean": 0.306640625, "entropy/min": 0.1513671875, "epoch": 0.498, "grad_norm": 0.9690807003077019, "kl": 0.25, "learning_rate": 1.0222114546513293e-06, "loss": 0.002958999713882804, "memory(GiB)": 147.17, "reward": 2.139566659927368, "reward_std": 0.2018376588821411, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.47256433963775635, "rewards/EvidenceHallucination/std": 0.4096042811870575, "rewards/Evidence_Num_Record/mean": 5.357142925262451, "rewards/Evidence_Num_Record/std": 4.071443557739258, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9117205739021301, "rewards/VideoAccuracy/std": 0.3327580392360687, "step": 498, "train_speed(iter/s)": 0.017779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.047619047619047616, "completions/max_length": 2625.0, "completions/mean_length": 669.0, "completions/min_length": 417.0, "entropy/max": 0.71484375, "entropy/mean": 0.3203125, "entropy/min": 0.08935546875, "epoch": 0.499, "grad_norm": 0.9967579922646225, "kl": 0.2294921875, "learning_rate": 1.0190388051033464e-06, "loss": 0.0024905321188271046, "memory(GiB)": 147.17, "reward": 1.6246230602264404, "reward_std": 0.24624596536159515, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37271884083747864, "rewards/EvidenceHallucination/std": 0.3991992175579071, "rewards/Evidence_Num_Record/mean": 5.5714287757873535, "rewards/Evidence_Num_Record/std": 1.7962408065795898, "rewards/Format/mean": 0.9523809552192688, "rewards/Format/std": 0.21554027497768402, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5405555963516235, "rewards/VideoAccuracy/std": 0.47258710861206055, "step": 499, "train_speed(iter/s)": 0.017751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2351.0, "completions/mean_length": 505.3095397949219, "completions/min_length": 257.0, "entropy/max": 0.609375, "entropy/mean": 0.390625, "entropy/min": 0.08447265625, "epoch": 0.5, "grad_norm": 1.2400215598471565, "kl": 0.294921875, "learning_rate": 1.015865963834808e-06, "loss": 0.003047055331990123, "memory(GiB)": 147.17, "reward": 1.3536604642868042, "reward_std": 0.3210704028606415, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.12089216709136963, "rewards/EvidenceHallucination/std": 0.2773915231227875, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 1.8543041944503784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.2628153860569, "rewards/VideoAccuracy/std": 0.3835754096508026, "step": 500, "train_speed(iter/s)": 0.017751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 760.0, "completions/mean_length": 479.5476379394531, "completions/min_length": 295.0, "entropy/max": 0.80859375, "entropy/mean": 0.345703125, "entropy/min": 0.08544921875, "epoch": 0.501, "grad_norm": 1.0692538501971287, "kl": 0.259765625, "learning_rate": 1.0126929627961895e-06, "loss": 0.0026359502226114273, "memory(GiB)": 147.17, "reward": 1.9519094228744507, "reward_std": 0.2122296541929245, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33205512166023254, "rewards/EvidenceHallucination/std": 0.4195026159286499, "rewards/Evidence_Num_Record/mean": 4.309524059295654, "rewards/Evidence_Num_Record/std": 1.5537225008010864, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6854982972145081, "rewards/VideoAccuracy/std": 0.4974336624145508, "step": 501, "train_speed(iter/s)": 0.017725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2112.0, "completions/mean_length": 731.7857055664062, "completions/min_length": 376.0, "entropy/max": 1.390625, "entropy/mean": 0.373046875, "entropy/min": 0.123046875, "epoch": 0.502, "grad_norm": 0.930144520848011, "kl": 0.19921875, "learning_rate": 1.0095198339395767e-06, "loss": 0.0021102442406117916, "memory(GiB)": 147.17, "reward": 1.5032862424850464, "reward_std": 0.25732943415641785, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.274224191904068, "rewards/EvidenceHallucination/std": 0.38205230236053467, "rewards/Evidence_Num_Record/mean": 7.642857074737549, "rewards/Evidence_Num_Record/std": 5.728923320770264, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.41510799527168274, "rewards/VideoAccuracy/std": 0.4720475673675537, "step": 502, "train_speed(iter/s)": 0.017707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1054.0, "completions/mean_length": 549.0952758789062, "completions/min_length": 378.0, "entropy/max": 0.8203125, "entropy/mean": 0.412109375, "entropy/min": 0.16796875, "epoch": 0.503, "grad_norm": 1.1059638862010952, "kl": 0.263671875, "learning_rate": 1.006346609218342e-06, "loss": 0.002717230934649706, "memory(GiB)": 147.17, "reward": 1.4658093452453613, "reward_std": 0.3106657564640045, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30617669224739075, "rewards/EvidenceHallucination/std": 0.39000555872917175, "rewards/Evidence_Num_Record/mean": 5.6666669845581055, "rewards/Evidence_Num_Record/std": 2.3957958221435547, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.37124067544937134, "rewards/VideoAccuracy/std": 0.43431395292282104, "step": 503, "train_speed(iter/s)": 0.01771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 621.0, "completions/mean_length": 459.3333435058594, "completions/min_length": 291.0, "entropy/max": 0.6015625, "entropy/mean": 0.337890625, "entropy/min": 0.1513671875, "epoch": 0.504, "grad_norm": 1.2113299240988997, "kl": 0.302734375, "learning_rate": 1.0031733205868223e-06, "loss": 0.0030504553578794003, "memory(GiB)": 147.17, "reward": 1.9867899417877197, "reward_std": 0.23987329006195068, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5109496712684631, "rewards/EvidenceHallucination/std": 0.45937153697013855, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 0.8665281534194946, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.717933177947998, "rewards/VideoAccuracy/std": 0.6398183107376099, "step": 504, "train_speed(iter/s)": 0.017714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1561.0, "completions/mean_length": 556.7142944335938, "completions/min_length": 282.0, "entropy/max": 0.671875, "entropy/mean": 0.302734375, "entropy/min": 0.140625, "epoch": 0.505, "grad_norm": 1.1487524994760716, "kl": 0.2294921875, "learning_rate": 1e-06, "loss": 0.0023781340569257736, "memory(GiB)": 147.17, "reward": 1.9254145622253418, "reward_std": 0.23551908135414124, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42413070797920227, "rewards/EvidenceHallucination/std": 0.4278799891471863, "rewards/Evidence_Num_Record/mean": 5.976190567016602, "rewards/Evidence_Num_Record/std": 4.386721134185791, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.740588366985321, "rewards/VideoAccuracy/std": 0.3732606768608093, "step": 505, "train_speed(iter/s)": 0.017705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2116.0, "completions/mean_length": 698.1190795898438, "completions/min_length": 339.0, "entropy/max": 0.75, "entropy/mean": 0.3515625, "entropy/min": 0.119140625, "epoch": 0.506, "grad_norm": 1.02628192816372, "kl": 0.2373046875, "learning_rate": 9.968266794131776e-07, "loss": 0.002447321079671383, "memory(GiB)": 147.17, "reward": 1.527622938156128, "reward_std": 0.1635744571685791, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.17251500487327576, "rewards/EvidenceHallucination/std": 0.32047125697135925, "rewards/Evidence_Num_Record/mean": 6.976190567016602, "rewards/Evidence_Num_Record/std": 6.387824058532715, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.45978668332099915, "rewards/VideoAccuracy/std": 0.4627102017402649, "step": 506, "train_speed(iter/s)": 0.017683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1757.0, "completions/mean_length": 569.9285888671875, "completions/min_length": 358.0, "entropy/max": 1.0703125, "entropy/mean": 0.37109375, "entropy/min": 0.1279296875, "epoch": 0.507, "grad_norm": 1.0982760174066724, "kl": 0.267578125, "learning_rate": 9.936533907816581e-07, "loss": 0.0027626955416053534, "memory(GiB)": 147.17, "reward": 1.7680379152297974, "reward_std": 0.3319416642189026, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3042178452014923, "rewards/EvidenceHallucination/std": 0.4273426830768585, "rewards/Evidence_Num_Record/mean": 5.595238208770752, "rewards/Evidence_Num_Record/std": 4.219926834106445, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6071943044662476, "rewards/VideoAccuracy/std": 0.5861407518386841, "step": 507, "train_speed(iter/s)": 0.017685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1295.0, "completions/mean_length": 548.3333740234375, "completions/min_length": 297.0, "entropy/max": 0.9453125, "entropy/mean": 0.275390625, "entropy/min": 0.1142578125, "epoch": 0.508, "grad_norm": 1.0287272845778204, "kl": 0.2158203125, "learning_rate": 9.904801660604232e-07, "loss": 0.002213830128312111, "memory(GiB)": 147.19, "reward": 2.122735023498535, "reward_std": 0.17406874895095825, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5652883052825928, "rewards/EvidenceHallucination/std": 0.3997299373149872, "rewards/Evidence_Num_Record/mean": 4.904761791229248, "rewards/Evidence_Num_Record/std": 2.00984787940979, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5952380895614624, "rewards/HonestTime/std": 0.49679574370384216, "rewards/VideoAccuracy/mean": 0.890629768371582, "rewards/VideoAccuracy/std": 0.47267624735832214, "step": 508, "train_speed(iter/s)": 0.017689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 978.0, "completions/mean_length": 532.9761962890625, "completions/min_length": 339.0, "entropy/max": 2.03125, "entropy/mean": 0.435546875, "entropy/min": 0.248046875, "epoch": 0.509, "grad_norm": 0.9878512105595498, "kl": 0.2392578125, "learning_rate": 9.873070372038104e-07, "loss": 0.002451932290568948, "memory(GiB)": 147.2, "reward": 1.5864818096160889, "reward_std": 0.10923613607883453, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3566635251045227, "rewards/EvidenceHallucination/std": 0.4345127046108246, "rewards/Evidence_Num_Record/mean": 5.047619342803955, "rewards/Evidence_Num_Record/std": 1.9871830940246582, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.48181581497192383, "rewards/VideoAccuracy/std": 0.4758481979370117, "step": 509, "train_speed(iter/s)": 0.017663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 736.0, "completions/mean_length": 496.9761962890625, "completions/min_length": 324.0, "entropy/max": 0.5234375, "entropy/mean": 0.380859375, "entropy/min": 0.263671875, "epoch": 0.51, "grad_norm": 1.0558917088281818, "kl": 0.275390625, "learning_rate": 9.84134036165192e-07, "loss": 0.0027638720348477364, "memory(GiB)": 147.2, "reward": 1.5433754920959473, "reward_std": 0.17969533801078796, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31144264340400696, "rewards/EvidenceHallucination/std": 0.4260638356208801, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.2308934926986694, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.4144202768802643, "rewards/VideoAccuracy/std": 0.44290584325790405, "step": 510, "train_speed(iter/s)": 0.017667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/mean_length": 530.357177734375, "completions/min_length": 280.0, "entropy/max": 0.62890625, "entropy/mean": 0.2451171875, "entropy/min": 0.072265625, "epoch": 0.511, "grad_norm": 0.9613440294327296, "kl": 0.2265625, "learning_rate": 9.809611948966533e-07, "loss": 0.0024141266476362944, "memory(GiB)": 147.2, "reward": 2.5995254516601562, "reward_std": 0.0887753814458847, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6758074760437012, "rewards/EvidenceHallucination/std": 0.34982067346572876, "rewards/Evidence_Num_Record/mean": 4.88095235824585, "rewards/Evidence_Num_Record/std": 4.139060974121094, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.2643641233444214, "rewards/VideoAccuracy/std": 0.19649749994277954, "step": 511, "train_speed(iter/s)": 0.017652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1766.0, "completions/mean_length": 693.7142944335938, "completions/min_length": 451.0, "entropy/max": 0.98046875, "entropy/mean": 0.392578125, "entropy/min": 0.19921875, "epoch": 0.512, "grad_norm": 1.0210448250392559, "kl": 0.2177734375, "learning_rate": 9.777885453486706e-07, "loss": 0.00225750170648098, "memory(GiB)": 147.2, "reward": 1.4917278289794922, "reward_std": 0.17943572998046875, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2786943018436432, "rewards/EvidenceHallucination/std": 0.42361971735954285, "rewards/Evidence_Num_Record/mean": 6.904762268066406, "rewards/Evidence_Num_Record/std": 4.0713725090026855, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.4026556611061096, "rewards/VideoAccuracy/std": 0.4588158428668976, "step": 512, "train_speed(iter/s)": 0.017649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1935.0, "completions/mean_length": 558.5238037109375, "completions/min_length": 275.0, "entropy/max": 0.546875, "entropy/mean": 0.380859375, "entropy/min": 0.1240234375, "epoch": 0.513, "grad_norm": 0.9943325329837618, "kl": 0.2353515625, "learning_rate": 9.746161194697893e-07, "loss": 0.0024553914554417133, "memory(GiB)": 147.2, "reward": 1.500482201576233, "reward_std": 0.2269335240125656, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430334210395813, "rewards/EvidenceHallucination/mean": 0.2552522122859955, "rewards/EvidenceHallucination/std": 0.39110541343688965, "rewards/Evidence_Num_Record/mean": 5.238095283508301, "rewards/Evidence_Num_Record/std": 4.004932880401611, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.428003191947937, "rewards/VideoAccuracy/std": 0.4602835178375244, "step": 513, "train_speed(iter/s)": 0.017624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1042.0, "completions/mean_length": 525.5, "completions/min_length": 359.0, "entropy/max": 0.515625, "entropy/mean": 0.3125, "entropy/min": 0.2001953125, "epoch": 0.514, "grad_norm": 1.175745174405539, "kl": 0.267578125, "learning_rate": 9.714439492063038e-07, "loss": 0.002724867779761553, "memory(GiB)": 147.2, "reward": 2.0888025760650635, "reward_std": 0.16696274280548096, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4821758270263672, "rewards/EvidenceHallucination/std": 0.4331739544868469, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.9006997346878052, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8257007002830505, "rewards/VideoAccuracy/std": 0.4494706392288208, "step": 514, "train_speed(iter/s)": 0.017628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1148.0, "completions/mean_length": 579.452392578125, "completions/min_length": 332.0, "entropy/max": 0.6875, "entropy/mean": 0.314453125, "entropy/min": 0.1650390625, "epoch": 0.515, "grad_norm": 1.2232727483994883, "kl": 0.2265625, "learning_rate": 9.682720665019325e-07, "loss": 0.002336142584681511, "memory(GiB)": 147.2, "reward": 1.8602702617645264, "reward_std": 0.3273540437221527, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3450998067855835, "rewards/EvidenceHallucination/std": 0.3622949421405792, "rewards/Evidence_Num_Record/mean": 5.714285850524902, "rewards/Evidence_Num_Record/std": 3.535780429840088, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.7007741332054138, "rewards/VideoAccuracy/std": 0.39636293053627014, "step": 515, "train_speed(iter/s)": 0.017627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/mean_length": 567.6190795898438, "completions/min_length": 331.0, "entropy/max": 0.7421875, "entropy/mean": 0.39453125, "entropy/min": 0.1875, "epoch": 0.516, "grad_norm": 1.0995164355839078, "kl": 0.23046875, "learning_rate": 9.651005032974993e-07, "loss": 0.002389857079833746, "memory(GiB)": 147.2, "reward": 1.4132567644119263, "reward_std": 0.21354961395263672, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22768878936767578, "rewards/EvidenceHallucination/std": 0.3905121982097626, "rewards/Evidence_Num_Record/mean": 6.0714287757873535, "rewards/Evidence_Num_Record/std": 4.37505578994751, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.3391473889350891, "rewards/VideoAccuracy/std": 0.4415587782859802, "step": 516, "train_speed(iter/s)": 0.0176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/mean_length": 599.0238037109375, "completions/min_length": 379.0, "entropy/max": 0.55859375, "entropy/mean": 0.345703125, "entropy/min": 0.1552734375, "epoch": 0.517, "grad_norm": 0.9684381587992229, "kl": 0.259765625, "learning_rate": 9.619292915306101e-07, "loss": 0.0026290405075997114, "memory(GiB)": 147.2, "reward": 1.848392367362976, "reward_std": 0.20444540679454803, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5357811450958252, "rewards/EvidenceHallucination/std": 0.40236812829971313, "rewards/Evidence_Num_Record/mean": 5.523809432983398, "rewards/Evidence_Num_Record/std": 2.1890869140625, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008703470230103, "rewards/VideoAccuracy/mean": 0.6555217504501343, "rewards/VideoAccuracy/std": 0.42811861634254456, "step": 517, "train_speed(iter/s)": 0.017596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/mean_length": 570.3095092773438, "completions/min_length": 362.0, "entropy/max": 2.328125, "entropy/mean": 0.36328125, "entropy/min": 0.1572265625, "epoch": 0.518, "grad_norm": 1.0215522190583068, "kl": 0.21875, "learning_rate": 9.587584631353328e-07, "loss": 0.0022501437924802303, "memory(GiB)": 147.2, "reward": 2.1257436275482178, "reward_std": 0.17595742642879486, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3400658369064331, "rewards/EvidenceHallucination/std": 0.4058413505554199, "rewards/Evidence_Num_Record/mean": 5.285714149475098, "rewards/Evidence_Num_Record/std": 2.7075836658477783, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.9291589856147766, "rewards/VideoAccuracy/std": 0.29189977049827576, "step": 518, "train_speed(iter/s)": 0.017591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/mean_length": 593.547607421875, "completions/min_length": 334.0, "entropy/max": 0.609375, "entropy/mean": 0.330078125, "entropy/min": 0.171875, "epoch": 0.519, "grad_norm": 1.0948985117161494, "kl": 0.2314453125, "learning_rate": 9.555880500418738e-07, "loss": 0.0023919104132801294, "memory(GiB)": 147.2, "reward": 1.7303181886672974, "reward_std": 0.1799202859401703, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46469253301620483, "rewards/EvidenceHallucination/std": 0.47335711121559143, "rewards/Evidence_Num_Record/mean": 5.238095283508301, "rewards/Evidence_Num_Record/std": 2.602152109146118, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5754749178886414, "rewards/VideoAccuracy/std": 0.45630866289138794, "step": 519, "train_speed(iter/s)": 0.017581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 911.0, "completions/mean_length": 551.857177734375, "completions/min_length": 362.0, "entropy/max": 0.4921875, "entropy/mean": 0.3671875, "entropy/min": 0.2412109375, "epoch": 0.52, "grad_norm": 1.1137900116256731, "kl": 0.255859375, "learning_rate": 9.524180841762576e-07, "loss": 0.0026009499561041594, "memory(GiB)": 147.2, "reward": 1.5844050645828247, "reward_std": 0.24579095840454102, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.298897922039032, "rewards/EvidenceHallucination/std": 0.41049379110336304, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 1.627293348312378, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.46272075176239014, "rewards/VideoAccuracy/std": 0.48835301399230957, "step": 520, "train_speed(iter/s)": 0.017582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 673.0, "completions/mean_length": 444.952392578125, "completions/min_length": 275.0, "entropy/max": 0.5, "entropy/mean": 0.298828125, "entropy/min": 0.15234375, "epoch": 0.521, "grad_norm": 1.1618956968259504, "kl": 0.2412109375, "learning_rate": 9.492485974600059e-07, "loss": 0.0024179406464099884, "memory(GiB)": 147.2, "reward": 2.2382819652557373, "reward_std": 0.11290633678436279, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5738786458969116, "rewards/EvidenceHallucination/std": 0.42459413409233093, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.8125753998756409, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9235062599182129, "rewards/VideoAccuracy/std": 0.4135523736476898, "step": 521, "train_speed(iter/s)": 0.017587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 743.5952758789062, "completions/min_length": 391.0, "entropy/max": 2.203125, "entropy/mean": 0.4375, "entropy/min": 0.1083984375, "epoch": 0.522, "grad_norm": 1.0285302054727505, "kl": 0.2060546875, "learning_rate": 9.460796218098141e-07, "loss": 0.002161647193133831, "memory(GiB)": 147.2, "reward": 1.5938557386398315, "reward_std": 0.14812681078910828, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42208966612815857, "rewards/EvidenceHallucination/std": 0.4313488006591797, "rewards/Evidence_Num_Record/mean": 6.833333492279053, "rewards/Evidence_Num_Record/std": 4.637685775756836, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430335700511932, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.4880092740058899, "rewards/VideoAccuracy/std": 0.48101186752319336, "step": 522, "train_speed(iter/s)": 0.017569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 837.0, "completions/mean_length": 577.8095092773438, "completions/min_length": 388.0, "entropy/max": 0.58203125, "entropy/mean": 0.35546875, "entropy/min": 0.20703125, "epoch": 0.523, "grad_norm": 1.1973145091114252, "kl": 0.2294921875, "learning_rate": 9.429111891372319e-07, "loss": 0.002317069796845317, "memory(GiB)": 147.2, "reward": 1.5629115104675293, "reward_std": 0.2889779508113861, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3631332814693451, "rewards/EvidenceHallucination/std": 0.4347761571407318, "rewards/Evidence_Num_Record/mean": 5.38095235824585, "rewards/Evidence_Num_Record/std": 2.0947365760803223, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.4236181974411011, "rewards/VideoAccuracy/std": 0.4120829105377197, "step": 523, "train_speed(iter/s)": 0.017567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 791.0, "completions/mean_length": 549.4285888671875, "completions/min_length": 361.0, "entropy/max": 0.455078125, "entropy/mean": 0.322265625, "entropy/min": 0.1708984375, "epoch": 0.524, "grad_norm": 1.0442632944342252, "kl": 0.255859375, "learning_rate": 9.397433313483416e-07, "loss": 0.0025761870201677084, "memory(GiB)": 147.2, "reward": 2.4195916652679443, "reward_std": 0.21475231647491455, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6845204830169678, "rewards/EvidenceHallucination/std": 0.37251976132392883, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 1.1466256380081177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 1.120782732963562, "rewards/VideoAccuracy/std": 0.5933963060379028, "step": 524, "train_speed(iter/s)": 0.017571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1039.0, "completions/mean_length": 559.4761962890625, "completions/min_length": 319.0, "entropy/max": 0.81640625, "entropy/mean": 0.36328125, "entropy/min": 0.1337890625, "epoch": 0.525, "grad_norm": 1.1559230240005511, "kl": 0.2294921875, "learning_rate": 9.365760803434354e-07, "loss": 0.002364410785958171, "memory(GiB)": 147.2, "reward": 1.8699347972869873, "reward_std": 0.2940487563610077, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3227474093437195, "rewards/EvidenceHallucination/std": 0.3658566474914551, "rewards/Evidence_Num_Record/mean": 5.047619342803955, "rewards/Evidence_Num_Record/std": 2.398702621459961, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.7101471424102783, "rewards/VideoAccuracy/std": 0.522103488445282, "step": 525, "train_speed(iter/s)": 0.017568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 974.0, "completions/mean_length": 529.5952758789062, "completions/min_length": 375.0, "entropy/max": 0.53125, "entropy/mean": 0.33984375, "entropy/min": 0.201171875, "epoch": 0.526, "grad_norm": 1.2709692704094622, "kl": 0.263671875, "learning_rate": 9.33409468016696e-07, "loss": 0.0026475153863430023, "memory(GiB)": 147.2, "reward": 1.5840200185775757, "reward_std": 0.40640494227409363, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33685699105262756, "rewards/EvidenceHallucination/std": 0.4412052631378174, "rewards/Evidence_Num_Record/mean": 5.095238208770752, "rewards/Evidence_Num_Record/std": 1.7080801725387573, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.4499819576740265, "rewards/VideoAccuracy/std": 0.42338553071022034, "step": 526, "train_speed(iter/s)": 0.017559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 900.0, "completions/mean_length": 569.547607421875, "completions/min_length": 411.0, "entropy/max": 0.546875, "entropy/mean": 0.37109375, "entropy/min": 0.216796875, "epoch": 0.527, "grad_norm": 0.9895986600311217, "kl": 0.255859375, "learning_rate": 9.302435262558747e-07, "loss": 0.0026058589573949575, "memory(GiB)": 147.2, "reward": 1.4728721380233765, "reward_std": 0.16476371884346008, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21637868881225586, "rewards/EvidenceHallucination/std": 0.3982798755168915, "rewards/Evidence_Num_Record/mean": 5.428571701049805, "rewards/Evidence_Num_Record/std": 1.6400901079177856, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.33912017941474915, "rewards/VideoAccuracy/std": 0.46005764603614807, "step": 527, "train_speed(iter/s)": 0.017555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 797.0, "completions/mean_length": 532.0714111328125, "completions/min_length": 312.0, "entropy/max": 1.46875, "entropy/mean": 0.328125, "entropy/min": 0.150390625, "epoch": 0.528, "grad_norm": 0.8471361905347333, "kl": 0.2314453125, "learning_rate": 9.270782869419693e-07, "loss": 0.002341092098504305, "memory(GiB)": 147.2, "reward": 1.8492215871810913, "reward_std": 0.05589429661631584, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20401684939861298, "rewards/EvidenceHallucination/std": 0.35366642475128174, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.1906039714813232, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6750848889350891, "rewards/VideoAccuracy/std": 0.5081431269645691, "step": 528, "train_speed(iter/s)": 0.017557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 987.0, "completions/mean_length": 577.2380981445312, "completions/min_length": 364.0, "entropy/max": 0.9375, "entropy/mean": 0.396484375, "entropy/min": 0.2177734375, "epoch": 0.529, "grad_norm": 1.2653717911738396, "kl": 0.23046875, "learning_rate": 9.239137819489047e-07, "loss": 0.0023404674138873816, "memory(GiB)": 147.2, "reward": 1.7768093347549438, "reward_std": 0.20981763303279877, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5229452848434448, "rewards/EvidenceHallucination/std": 0.42321765422821045, "rewards/Evidence_Num_Record/mean": 5.238095283508301, "rewards/Evidence_Num_Record/std": 1.6646909713745117, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.5817440152168274, "rewards/VideoAccuracy/std": 0.3769208788871765, "step": 529, "train_speed(iter/s)": 0.017545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 789.0, "completions/mean_length": 527.6428833007812, "completions/min_length": 379.0, "entropy/max": 0.58984375, "entropy/mean": 0.396484375, "entropy/min": 0.287109375, "epoch": 0.53, "grad_norm": 1.0116197589195777, "kl": 0.251953125, "learning_rate": 9.207500431432113e-07, "loss": 0.002533841645345092, "memory(GiB)": 147.2, "reward": 1.3694162368774414, "reward_std": 0.17520248889923096, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16798247396945953, "rewards/EvidenceHallucination/std": 0.3317740559577942, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.3460484743118286, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.2739149332046509, "rewards/VideoAccuracy/std": 0.46860837936401367, "step": 530, "train_speed(iter/s)": 0.017547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 792.0, "completions/mean_length": 531.8095092773438, "completions/min_length": 383.0, "entropy/max": 0.3671875, "entropy/mean": 0.263671875, "entropy/min": 0.10693359375, "epoch": 0.531, "grad_norm": 1.0389044122120064, "kl": 0.21875, "learning_rate": 9.17587102383704e-07, "loss": 0.002218902111053467, "memory(GiB)": 147.2, "reward": 2.226511001586914, "reward_std": 0.09947007894515991, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5426416993141174, "rewards/EvidenceHallucination/std": 0.4090539813041687, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 0.4068232774734497, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9179825782775879, "rewards/VideoAccuracy/std": 0.4307158589363098, "step": 531, "train_speed(iter/s)": 0.017547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/mean_length": 521.6428833007812, "completions/min_length": 342.0, "entropy/max": 0.94140625, "entropy/mean": 0.396484375, "entropy/min": 0.173828125, "epoch": 0.532, "grad_norm": 1.23100170161689, "kl": 0.2490234375, "learning_rate": 9.144249915211605e-07, "loss": 0.002525723772123456, "memory(GiB)": 147.2, "reward": 1.67011559009552, "reward_std": 0.3325250446796417, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4056369364261627, "rewards/EvidenceHallucination/std": 0.41158750653266907, "rewards/Evidence_Num_Record/mean": 4.833333492279053, "rewards/Evidence_Num_Record/std": 1.98674476146698, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5223214626312256, "rewards/VideoAccuracy/std": 0.44740840792655945, "step": 532, "train_speed(iter/s)": 0.017547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1259.0, "completions/mean_length": 576.547607421875, "completions/min_length": 433.0, "entropy/max": 0.828125, "entropy/mean": 0.365234375, "entropy/min": 0.162109375, "epoch": 0.533, "grad_norm": 1.1137092359902618, "kl": 0.2294921875, "learning_rate": 9.11263742398002e-07, "loss": 0.002338199643418193, "memory(GiB)": 147.2, "reward": 1.3284426927566528, "reward_std": 0.2385426014661789, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13656195998191833, "rewards/EvidenceHallucination/std": 0.3124164640903473, "rewards/Evidence_Num_Record/mean": 5.333333492279053, "rewards/Evidence_Num_Record/std": 2.204946517944336, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.2392255663871765, "rewards/VideoAccuracy/std": 0.34489506483078003, "step": 533, "train_speed(iter/s)": 0.017546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 637.0, "completions/mean_length": 488.16668701171875, "completions/min_length": 305.0, "entropy/max": 0.41796875, "entropy/mean": 0.28125, "entropy/min": 0.18359375, "epoch": 0.534, "grad_norm": 1.1890010499188732, "kl": 0.263671875, "learning_rate": 9.081033868479726e-07, "loss": 0.002650885609909892, "memory(GiB)": 147.2, "reward": 2.124683141708374, "reward_std": 0.21604083478450775, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5279164910316467, "rewards/EvidenceHallucination/std": 0.4119936227798462, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.8323455452919006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.8571950793266296, "rewards/VideoAccuracy/std": 0.5043355822563171, "step": 534, "train_speed(iter/s)": 0.017549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 662.0, "completions/mean_length": 451.3809509277344, "completions/min_length": 283.0, "entropy/max": 0.91015625, "entropy/mean": 0.40234375, "entropy/min": 0.177734375, "epoch": 0.535, "grad_norm": 1.1734850505828, "kl": 0.255859375, "learning_rate": 9.049439566958175e-07, "loss": 0.0025756550021469593, "memory(GiB)": 147.2, "reward": 1.7241538763046265, "reward_std": 0.05795424431562424, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3884280025959015, "rewards/EvidenceHallucination/std": 0.4080182909965515, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 1.0809296369552612, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.5512299537658691, "rewards/VideoAccuracy/std": 0.3939768075942993, "step": 535, "train_speed(iter/s)": 0.017561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 719.0, "completions/mean_length": 475.16668701171875, "completions/min_length": 270.0, "entropy/max": 0.70703125, "entropy/mean": 0.3515625, "entropy/min": 0.2001953125, "epoch": 0.536, "grad_norm": 1.3036157570263955, "kl": 0.25390625, "learning_rate": 9.017854837569628e-07, "loss": 0.002558166394010186, "memory(GiB)": 147.2, "reward": 1.823333978652954, "reward_std": 0.2030932903289795, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5309664011001587, "rewards/EvidenceHallucination/std": 0.4735872149467468, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.9093654155731201, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6171407103538513, "rewards/VideoAccuracy/std": 0.4215722680091858, "step": 536, "train_speed(iter/s)": 0.017565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 761.0, "completions/mean_length": 501.0, "completions/min_length": 324.0, "entropy/max": 0.73046875, "entropy/mean": 0.36328125, "entropy/min": 0.205078125, "epoch": 0.537, "grad_norm": 1.1426002973461102, "kl": 0.255859375, "learning_rate": 8.986279998371967e-07, "loss": 0.0025861049070954323, "memory(GiB)": 147.2, "reward": 1.8708044290542603, "reward_std": 0.1367248147726059, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4680973291397095, "rewards/EvidenceHallucination/std": 0.4764857888221741, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.214507818222046, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6771848201751709, "rewards/VideoAccuracy/std": 0.5465537309646606, "step": 537, "train_speed(iter/s)": 0.017568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1004.0, "completions/mean_length": 559.357177734375, "completions/min_length": 355.0, "entropy/max": 0.8515625, "entropy/mean": 0.375, "entropy/min": 0.119140625, "epoch": 0.538, "grad_norm": 0.8859204160097884, "kl": 0.2109375, "learning_rate": 8.954715367323466e-07, "loss": 0.002143596298992634, "memory(GiB)": 147.2, "reward": 1.7805614471435547, "reward_std": 0.1892300844192505, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16119354963302612, "rewards/EvidenceHallucination/std": 0.3293311595916748, "rewards/Evidence_Num_Record/mean": 4.833333492279053, "rewards/Evidence_Num_Record/std": 1.9116672277450562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6197511553764343, "rewards/VideoAccuracy/std": 0.45194223523139954, "step": 538, "train_speed(iter/s)": 0.017545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 777.0, "completions/mean_length": 518.5952758789062, "completions/min_length": 267.0, "entropy/max": 0.68359375, "entropy/mean": 0.369140625, "entropy/min": 0.2060546875, "epoch": 0.539, "grad_norm": 1.2069866276978913, "kl": 0.2451171875, "learning_rate": 8.923161262279611e-07, "loss": 0.0024995713029056787, "memory(GiB)": 147.2, "reward": 1.718224287033081, "reward_std": 0.14783141016960144, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.43049150705337524, "rewards/EvidenceHallucination/std": 0.4441774785518646, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.0581248998641968, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.5368879437446594, "rewards/VideoAccuracy/std": 0.394218385219574, "step": 539, "train_speed(iter/s)": 0.017549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/mean_length": 514.2142944335938, "completions/min_length": 371.0, "entropy/max": 0.765625, "entropy/mean": 0.359375, "entropy/min": 0.173828125, "epoch": 0.54, "grad_norm": 1.15138653953421, "kl": 0.255859375, "learning_rate": 8.89161800098989e-07, "loss": 0.002595373895019293, "memory(GiB)": 147.2, "reward": 1.496476650238037, "reward_std": 0.1784413456916809, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2719321548938751, "rewards/EvidenceHallucination/std": 0.39456066489219666, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.9624820947647095, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.38018539547920227, "rewards/VideoAccuracy/std": 0.45438462495803833, "step": 540, "train_speed(iter/s)": 0.017548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 743.0, "completions/mean_length": 475.5, "completions/min_length": 332.0, "entropy/max": 0.62890625, "entropy/mean": 0.2890625, "entropy/min": 0.11279296875, "epoch": 0.541, "grad_norm": 1.0596125947616801, "kl": 0.2392578125, "learning_rate": 8.860085901094594e-07, "loss": 0.0024353486951440573, "memory(GiB)": 147.2, "reward": 2.333366870880127, "reward_std": 0.11621489375829697, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3714437484741211, "rewards/EvidenceHallucination/std": 0.44054171442985535, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.8540400862693787, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.0590778589248657, "rewards/VideoAccuracy/std": 0.42696964740753174, "step": 541, "train_speed(iter/s)": 0.01755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 957.0, "completions/mean_length": 497.452392578125, "completions/min_length": 246.0, "entropy/max": 0.79296875, "entropy/mean": 0.38671875, "entropy/min": 0.212890625, "epoch": 0.542, "grad_norm": 0.8243397697316406, "kl": 0.259765625, "learning_rate": 8.828565280121617e-07, "loss": 0.0026443253736943007, "memory(GiB)": 147.2, "reward": 1.4261549711227417, "reward_std": 0.12304135411977768, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.208102747797966, "rewards/EvidenceHallucination/std": 0.3766601085662842, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.5311325788497925, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.3178676962852478, "rewards/VideoAccuracy/std": 0.4038638174533844, "step": 542, "train_speed(iter/s)": 0.017558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1040.0, "completions/mean_length": 543.857177734375, "completions/min_length": 364.0, "entropy/max": 0.76171875, "entropy/mean": 0.3828125, "entropy/min": 0.26171875, "epoch": 0.543, "grad_norm": 1.1700946200322362, "kl": 0.23828125, "learning_rate": 8.797056455483266e-07, "loss": 0.002405008068308234, "memory(GiB)": 147.2, "reward": 1.1764250993728638, "reward_std": 0.22184321284294128, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.09296326339244843, "rewards/EvidenceHallucination/std": 0.2726643681526184, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.2280595302581787, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.095238097012043, "rewards/HonestTime/std": 0.297101765871048, "rewards/VideoAccuracy/mean": 0.13878484070301056, "rewards/VideoAccuracy/std": 0.26740172505378723, "step": 543, "train_speed(iter/s)": 0.017556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 780.0, "completions/mean_length": 481.4761962890625, "completions/min_length": 293.0, "entropy/max": 0.60546875, "entropy/mean": 0.337890625, "entropy/min": 0.12353515625, "epoch": 0.544, "grad_norm": 1.0793033530846348, "kl": 0.279296875, "learning_rate": 8.765559744473053e-07, "loss": 0.003017617389559746, "memory(GiB)": 147.2, "reward": 1.6041492223739624, "reward_std": 0.24795274436473846, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1896679550409317, "rewards/EvidenceHallucination/std": 0.3711082339286804, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.0682299137115479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.4043109118938446, "rewards/VideoAccuracy/std": 0.47207334637641907, "step": 544, "train_speed(iter/s)": 0.01756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1575.0, "completions/mean_length": 510.16668701171875, "completions/min_length": 237.0, "entropy/max": 0.984375, "entropy/mean": 0.333984375, "entropy/min": 0.1376953125, "epoch": 0.545, "grad_norm": 1.056642920217122, "kl": 0.2490234375, "learning_rate": 8.734075464262506e-07, "loss": 0.0025731062050908804, "memory(GiB)": 147.2, "reward": 2.0171689987182617, "reward_std": 0.056373246014118195, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4805810749530792, "rewards/EvidenceHallucination/std": 0.3956851661205292, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 2.6864826679229736, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8210528492927551, "rewards/VideoAccuracy/std": 0.41951730847358704, "step": 545, "train_speed(iter/s)": 0.017557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1076.0, "completions/mean_length": 516.0714111328125, "completions/min_length": 293.0, "entropy/max": 0.828125, "entropy/mean": 0.353515625, "entropy/min": 0.11669921875, "epoch": 0.546, "grad_norm": 1.0850137151077015, "kl": 0.2451171875, "learning_rate": 8.702603931897981e-07, "loss": 0.002520657144486904, "memory(GiB)": 147.2, "reward": 1.4326279163360596, "reward_std": 0.28107914328575134, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32118892669677734, "rewards/EvidenceHallucination/std": 0.4417652189731598, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 1.9871830940246582, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.31600916385650635, "rewards/VideoAccuracy/std": 0.4143351912498474, "step": 546, "train_speed(iter/s)": 0.017552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/mean_length": 524.7619018554688, "completions/min_length": 341.0, "entropy/max": 0.578125, "entropy/mean": 0.337890625, "entropy/min": 0.21484375, "epoch": 0.547, "grad_norm": 0.9859721647461079, "kl": 0.267578125, "learning_rate": 8.671145464297459e-07, "loss": 0.0027126925997436047, "memory(GiB)": 147.2, "reward": 1.842204213142395, "reward_std": 0.20456859469413757, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4958741366863251, "rewards/EvidenceHallucination/std": 0.45373860001564026, "rewards/Evidence_Num_Record/mean": 5.047619342803955, "rewards/Evidence_Num_Record/std": 3.863224744796753, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6430291533470154, "rewards/VideoAccuracy/std": 0.5219913721084595, "step": 547, "train_speed(iter/s)": 0.01755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1603.0, "completions/mean_length": 568.952392578125, "completions/min_length": 288.0, "entropy/max": 2.515625, "entropy/mean": 0.376953125, "entropy/min": 0.12353515625, "epoch": 0.548, "grad_norm": 0.869066911510286, "kl": 0.2109375, "learning_rate": 8.63970037824736e-07, "loss": 0.002374354749917984, "memory(GiB)": 147.2, "reward": 1.8578296899795532, "reward_std": 0.1573992371559143, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35328730940818787, "rewards/EvidenceHallucination/std": 0.4390169084072113, "rewards/Evidence_Num_Record/mean": 5.095238208770752, "rewards/Evidence_Num_Record/std": 3.4486443996429443, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6538389325141907, "rewards/VideoAccuracy/std": 0.44841691851615906, "step": 548, "train_speed(iter/s)": 0.017548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/mean_length": 544.2857055664062, "completions/min_length": 341.0, "entropy/max": 1.65625, "entropy/mean": 0.439453125, "entropy/min": 0.158203125, "epoch": 0.549, "grad_norm": 1.2103727378080085, "kl": 0.251953125, "learning_rate": 8.608268990399348e-07, "loss": 0.0025931699201464653, "memory(GiB)": 147.2, "reward": 1.9039450883865356, "reward_std": 0.19438162446022034, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6002238392829895, "rewards/EvidenceHallucination/std": 0.36550626158714294, "rewards/Evidence_Num_Record/mean": 5.0714287757873535, "rewards/Evidence_Num_Record/std": 2.588839530944824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6839002966880798, "rewards/VideoAccuracy/std": 0.3481704890727997, "step": 549, "train_speed(iter/s)": 0.017541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1041.0, "completions/mean_length": 562.0952758789062, "completions/min_length": 294.0, "entropy/max": 0.625, "entropy/mean": 0.365234375, "entropy/min": 0.20703125, "epoch": 0.55, "grad_norm": 1.231787019614606, "kl": 0.23046875, "learning_rate": 8.576851617267149e-07, "loss": 0.002344725653529167, "memory(GiB)": 147.2, "reward": 1.6168367862701416, "reward_std": 0.34122419357299805, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4344601631164551, "rewards/EvidenceHallucination/std": 0.4488111436367035, "rewards/Evidence_Num_Record/mean": 5.333333492279053, "rewards/Evidence_Num_Record/std": 1.9084748029708862, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5013732314109802, "rewards/VideoAccuracy/std": 0.5075318217277527, "step": 550, "train_speed(iter/s)": 0.01754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/mean_length": 571.3095092773438, "completions/min_length": 337.0, "entropy/max": 0.470703125, "entropy/mean": 0.265625, "entropy/min": 0.1435546875, "epoch": 0.551, "grad_norm": 1.0605074789592959, "kl": 0.21875, "learning_rate": 8.545448575223368e-07, "loss": 0.002243774477392435, "memory(GiB)": 147.2, "reward": 2.0291800498962402, "reward_std": 0.2675192952156067, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45944949984550476, "rewards/EvidenceHallucination/std": 0.4359951615333557, "rewards/Evidence_Num_Record/mean": 5.023809432983398, "rewards/Evidence_Num_Record/std": 2.727350950241089, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430335700511932, "rewards/VideoAccuracy/mean": 0.7420520186424255, "rewards/VideoAccuracy/std": 0.48766934871673584, "step": 551, "train_speed(iter/s)": 0.017536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/mean_length": 530.7142944335938, "completions/min_length": 252.0, "entropy/max": 1.96875, "entropy/mean": 0.42578125, "entropy/min": 0.2216796875, "epoch": 0.552, "grad_norm": 1.008962727340414, "kl": 0.2470703125, "learning_rate": 8.514060180496284e-07, "loss": 0.002557076746597886, "memory(GiB)": 147.2, "reward": 1.6508671045303345, "reward_std": 0.08526341617107391, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.34803465008735657, "rewards/EvidenceHallucination/std": 0.3888440430164337, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 2.1068992614746094, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5193555355072021, "rewards/VideoAccuracy/std": 0.46005895733833313, "step": 552, "train_speed(iter/s)": 0.017541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 995.0, "completions/mean_length": 500.0238037109375, "completions/min_length": 344.0, "entropy/max": 0.95703125, "entropy/mean": 0.421875, "entropy/min": 0.259765625, "epoch": 0.553, "grad_norm": 0.842227210358372, "kl": 0.2412109375, "learning_rate": 8.482686749166684e-07, "loss": 0.0024455091916024685, "memory(GiB)": 147.2, "reward": 1.3672181367874146, "reward_std": 0.14556831121444702, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23915958404541016, "rewards/EvidenceHallucination/std": 0.4084899425506592, "rewards/Evidence_Num_Record/mean": 4.6666669845581055, "rewards/Evidence_Num_Record/std": 1.3733822107315063, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.29081475734710693, "rewards/VideoAccuracy/std": 0.4380667805671692, "step": 553, "train_speed(iter/s)": 0.017541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 613.0, "completions/mean_length": 448.0714416503906, "completions/min_length": 311.0, "entropy/max": 0.447265625, "entropy/mean": 0.298828125, "entropy/min": 0.158203125, "epoch": 0.554, "grad_norm": 1.087517160201009, "kl": 0.267578125, "learning_rate": 8.451328597164677e-07, "loss": 0.002687928732484579, "memory(GiB)": 147.2, "reward": 2.3258044719696045, "reward_std": 0.27705249190330505, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5598920583724976, "rewards/EvidenceHallucination/std": 0.3996395468711853, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.9877296090126038, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 1.0519213676452637, "rewards/VideoAccuracy/std": 0.659984290599823, "step": 554, "train_speed(iter/s)": 0.017541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 997.0, "completions/mean_length": 534.3333129882812, "completions/min_length": 327.0, "entropy/max": 1.015625, "entropy/mean": 0.34375, "entropy/min": 0.13671875, "epoch": 0.555, "grad_norm": 1.1474073643300726, "kl": 0.2158203125, "learning_rate": 8.4199860402665e-07, "loss": 0.0022617948707193136, "memory(GiB)": 147.2, "reward": 2.108491897583008, "reward_std": 0.09929686784744263, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6211246848106384, "rewards/EvidenceHallucination/std": 0.3694995939731598, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 1.591936707496643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8842668533325195, "rewards/VideoAccuracy/std": 0.2836246192455292, "step": 555, "train_speed(iter/s)": 0.017544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 811.0, "completions/mean_length": 513.9761962890625, "completions/min_length": 389.0, "entropy/max": 0.64453125, "entropy/mean": 0.341796875, "entropy/min": 0.2021484375, "epoch": 0.556, "grad_norm": 1.0451420644515945, "kl": 0.23828125, "learning_rate": 8.38865939409136e-07, "loss": 0.0024297989439219236, "memory(GiB)": 147.2, "reward": 1.6017160415649414, "reward_std": 0.12775146961212158, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3997710645198822, "rewards/EvidenceHallucination/std": 0.4567578434944153, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.6100728511810303, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.46938085556030273, "rewards/VideoAccuracy/std": 0.4314016103744507, "step": 556, "train_speed(iter/s)": 0.017521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 688.0, "completions/mean_length": 478.952392578125, "completions/min_length": 311.0, "entropy/max": 0.83203125, "entropy/mean": 0.3671875, "entropy/min": 0.220703125, "epoch": 0.557, "grad_norm": 1.2076152837928047, "kl": 0.25390625, "learning_rate": 8.357348974098231e-07, "loss": 0.0025751078501343727, "memory(GiB)": 147.2, "reward": 1.8883492946624756, "reward_std": 0.2357943058013916, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45542046427726746, "rewards/EvidenceHallucination/std": 0.46642157435417175, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.2733304500579834, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4047619104385376, "rewards/HonestTime/std": 0.49679577350616455, "rewards/VideoAccuracy/mean": 0.7163127660751343, "rewards/VideoAccuracy/std": 0.5752731561660767, "step": 557, "train_speed(iter/s)": 0.017524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 697.0, "completions/mean_length": 505.8571472167969, "completions/min_length": 305.0, "entropy/max": 0.51953125, "entropy/mean": 0.283203125, "entropy/min": 0.1416015625, "epoch": 0.558, "grad_norm": 1.2037434616808447, "kl": 0.2314453125, "learning_rate": 8.326055095582693e-07, "loss": 0.0023501659743487835, "memory(GiB)": 147.2, "reward": 2.298994302749634, "reward_std": 0.18438777327537537, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5537318587303162, "rewards/EvidenceHallucination/std": 0.39360833168029785, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 0.8621610999107361, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 1.0644384622573853, "rewards/VideoAccuracy/std": 0.3866475224494934, "step": 558, "train_speed(iter/s)": 0.017522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/mean_length": 485.0952453613281, "completions/min_length": 281.0, "entropy/max": 0.609375, "entropy/mean": 0.337890625, "entropy/min": 0.1396484375, "epoch": 0.559, "grad_norm": 1.1129566054635687, "kl": 0.2451171875, "learning_rate": 8.294778073673761e-07, "loss": 0.002496413653716445, "memory(GiB)": 147.2, "reward": 1.7798714637756348, "reward_std": 0.1880035102367401, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5607653260231018, "rewards/EvidenceHallucination/std": 0.3877881169319153, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.6562974452972412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.5724801421165466, "rewards/VideoAccuracy/std": 0.3908865749835968, "step": 559, "train_speed(iter/s)": 0.017521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 702.0, "completions/mean_length": 447.73809814453125, "completions/min_length": 306.0, "entropy/max": 0.5390625, "entropy/mean": 0.36328125, "entropy/min": 0.26171875, "epoch": 0.56, "grad_norm": 1.239147157526934, "kl": 0.26953125, "learning_rate": 8.263518223330696e-07, "loss": 0.0027397829107940197, "memory(GiB)": 147.2, "reward": 1.7501899003982544, "reward_std": 0.3330119252204895, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46977293491363525, "rewards/EvidenceHallucination/std": 0.4220462441444397, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.9578818678855896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6229020357131958, "rewards/VideoAccuracy/std": 0.5011475086212158, "step": 560, "train_speed(iter/s)": 0.017519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 807.0, "completions/mean_length": 441.4761962890625, "completions/min_length": 301.0, "entropy/max": 0.384765625, "entropy/mean": 0.259765625, "entropy/min": 0.1240234375, "epoch": 0.561, "grad_norm": 1.0567308055382663, "kl": 0.2578125, "learning_rate": 8.232275859339841e-07, "loss": 0.002620976883918047, "memory(GiB)": 147.2, "reward": 2.082119941711426, "reward_std": 0.10417261719703674, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3060951232910156, "rewards/EvidenceHallucination/std": 0.4310819208621979, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 1.2261664867401123, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8209007978439331, "rewards/VideoAccuracy/std": 0.5067620873451233, "step": 561, "train_speed(iter/s)": 0.017523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 703.0, "completions/mean_length": 437.23809814453125, "completions/min_length": 217.0, "entropy/max": 2.328125, "entropy/mean": 0.609375, "entropy/min": 0.23828125, "epoch": 0.562, "grad_norm": 1.2524507605021182, "kl": 0.267578125, "learning_rate": 8.201051296311461e-07, "loss": 0.0027312892489135265, "memory(GiB)": 147.2, "reward": 1.9048120975494385, "reward_std": 0.0746854692697525, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6275681257247925, "rewards/EvidenceHallucination/std": 0.39228275418281555, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.3050869703292847, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108052015304565, "rewards/VideoAccuracy/mean": 0.7316792011260986, "rewards/VideoAccuracy/std": 0.38063371181488037, "step": 562, "train_speed(iter/s)": 0.017531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023809523809523808, "completions/max_length": 2625.0, "completions/mean_length": 519.1904907226562, "completions/min_length": 308.0, "entropy/max": 0.8046875, "entropy/mean": 0.392578125, "entropy/min": 0.115234375, "epoch": 0.563, "grad_norm": 0.7578925296379078, "kl": 0.2373046875, "learning_rate": 8.169844848676552e-07, "loss": 0.0024781660176813602, "memory(GiB)": 147.2, "reward": 1.2615805864334106, "reward_std": 0.09937258064746857, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.17860430479049683, "rewards/EvidenceHallucination/std": 0.37374719977378845, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 4.929938793182373, "rewards/Format/mean": 0.9761905074119568, "rewards/Format/std": 0.15430334210395813, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.20919305086135864, "rewards/VideoAccuracy/std": 0.366926908493042, "step": 563, "train_speed(iter/s)": 0.017504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 673.0, "completions/mean_length": 447.4285888671875, "completions/min_length": 303.0, "entropy/max": 0.53125, "entropy/mean": 0.310546875, "entropy/min": 0.19140625, "epoch": 0.564, "grad_norm": 1.0846174037438228, "kl": 0.27734375, "learning_rate": 8.138656830683699e-07, "loss": 0.002800673944875598, "memory(GiB)": 147.2, "reward": 1.9805023670196533, "reward_std": 0.16426292061805725, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4681243896484375, "rewards/EvidenceHallucination/std": 0.47596821188926697, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 1.2195180654525757, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.7249726057052612, "rewards/VideoAccuracy/std": 0.5356833338737488, "step": 564, "train_speed(iter/s)": 0.017512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 828.0, "completions/mean_length": 538.2380981445312, "completions/min_length": 323.0, "entropy/max": 1.25, "entropy/mean": 0.41015625, "entropy/min": 0.1416015625, "epoch": 0.565, "grad_norm": 1.1956637226126732, "kl": 0.2216796875, "learning_rate": 8.107487556395901e-07, "loss": 0.0022581107914447784, "memory(GiB)": 147.2, "reward": 1.9260531663894653, "reward_std": 0.3004651665687561, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5718371868133545, "rewards/EvidenceHallucination/std": 0.40739184617996216, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.0178431272506714, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.711685836315155, "rewards/VideoAccuracy/std": 0.3880319893360138, "step": 565, "train_speed(iter/s)": 0.017513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/mean_length": 433.73809814453125, "completions/min_length": 310.0, "entropy/max": 0.6953125, "entropy/mean": 0.408203125, "entropy/min": 0.25390625, "epoch": 0.566, "grad_norm": 1.172736829803333, "kl": 0.259765625, "learning_rate": 8.076337339687394e-07, "loss": 0.00262894737534225, "memory(GiB)": 147.2, "reward": 1.4021013975143433, "reward_std": 0.22173522412776947, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2598130404949188, "rewards/EvidenceHallucination/std": 0.41785454750061035, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.2977578043937683, "rewards/VideoAccuracy/std": 0.3597749173641205, "step": 566, "train_speed(iter/s)": 0.017516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 671.0, "completions/mean_length": 426.1190490722656, "completions/min_length": 299.0, "entropy/max": 0.7734375, "entropy/mean": 0.345703125, "entropy/min": 0.193359375, "epoch": 0.567, "grad_norm": 1.0379878114000836, "kl": 0.294921875, "learning_rate": 8.045206494240519e-07, "loss": 0.0031821592710912228, "memory(GiB)": 147.2, "reward": 1.4732636213302612, "reward_std": 0.13507957756519318, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19460420310497284, "rewards/EvidenceHallucination/std": 0.37920790910720825, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 1.315722942352295, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.33434268832206726, "rewards/VideoAccuracy/std": 0.5319502949714661, "step": 567, "train_speed(iter/s)": 0.017525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/mean_length": 573.2857055664062, "completions/min_length": 375.0, "entropy/max": 1.3828125, "entropy/mean": 0.306640625, "entropy/min": 0.11572265625, "epoch": 0.568, "grad_norm": 0.8919847704200217, "kl": 0.21484375, "learning_rate": 8.014095333542547e-07, "loss": 0.0021857237443327904, "memory(GiB)": 147.2, "reward": 1.85391366481781, "reward_std": 0.18633460998535156, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3625771403312683, "rewards/EvidenceHallucination/std": 0.4368312656879425, "rewards/Evidence_Num_Record/mean": 5.0714287757873535, "rewards/Evidence_Num_Record/std": 2.2781341075897217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5952380895614624, "rewards/HonestTime/std": 0.49679574370384216, "rewards/VideoAccuracy/mean": 0.6623504161834717, "rewards/VideoAccuracy/std": 0.6112027168273926, "step": 568, "train_speed(iter/s)": 0.017524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1031.0, "completions/mean_length": 495.3571472167969, "completions/min_length": 301.0, "entropy/max": 0.69921875, "entropy/mean": 0.40234375, "entropy/min": 0.212890625, "epoch": 0.569, "grad_norm": 1.210078635891529, "kl": 0.23828125, "learning_rate": 7.983004170882517e-07, "loss": 0.002446995582431555, "memory(GiB)": 147.2, "reward": 1.491422176361084, "reward_std": 0.3153664767742157, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28596773743629456, "rewards/EvidenceHallucination/std": 0.39698514342308044, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 2.1299262046813965, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.3485143184661865, "rewards/VideoAccuracy/std": 0.3958366811275482, "step": 569, "train_speed(iter/s)": 0.017519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/mean_length": 507.452392578125, "completions/min_length": 335.0, "entropy/max": 0.640625, "entropy/mean": 0.384765625, "entropy/min": 0.16015625, "epoch": 0.57, "grad_norm": 1.2012402312166044, "kl": 0.255859375, "learning_rate": 7.951933319348095e-07, "loss": 0.0026038773357868195, "memory(GiB)": 147.2, "reward": 1.5208826065063477, "reward_std": 0.28261804580688477, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3382541537284851, "rewards/EvidenceHallucination/std": 0.4244920015335083, "rewards/Evidence_Num_Record/mean": 5.047619342803955, "rewards/Evidence_Num_Record/std": 3.3998019695281982, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.41989845037460327, "rewards/VideoAccuracy/std": 0.5120235085487366, "step": 570, "train_speed(iter/s)": 0.017519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 754.0, "completions/mean_length": 476.5476379394531, "completions/min_length": 322.0, "entropy/max": 0.482421875, "entropy/mean": 0.26171875, "entropy/min": 0.09326171875, "epoch": 0.571, "grad_norm": 1.083601154480782, "kl": 0.2421875, "learning_rate": 7.920883091822408e-07, "loss": 0.002464497461915016, "memory(GiB)": 147.2, "reward": 1.979665756225586, "reward_std": 0.27502089738845825, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24763883650302887, "rewards/EvidenceHallucination/std": 0.3845669627189636, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 1.0348178148269653, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9285714626312256, "rewards/HonestTime/std": 0.26066118478775024, "rewards/VideoAccuracy/mean": 0.7444235682487488, "rewards/VideoAccuracy/std": 0.5135056972503662, "step": 571, "train_speed(iter/s)": 0.017523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1556.0, "completions/mean_length": 513.1190795898438, "completions/min_length": 332.0, "entropy/max": 1.0390625, "entropy/mean": 0.40625, "entropy/min": 0.1865234375, "epoch": 0.572, "grad_norm": 1.3195737662730367, "kl": 0.24609375, "learning_rate": 7.889853800980903e-07, "loss": 0.00253634387627244, "memory(GiB)": 147.2, "reward": 1.7044157981872559, "reward_std": 0.27610307931900024, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.436484158039093, "rewards/EvidenceHallucination/std": 0.42803844809532166, "rewards/Evidence_Num_Record/mean": 5.523809432983398, "rewards/Evidence_Num_Record/std": 3.240281105041504, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5552142262458801, "rewards/VideoAccuracy/std": 0.4598389267921448, "step": 572, "train_speed(iter/s)": 0.017519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/mean_length": 519.8809814453125, "completions/min_length": 308.0, "entropy/max": 0.59375, "entropy/mean": 0.369140625, "entropy/min": 0.150390625, "epoch": 0.573, "grad_norm": 1.2196867198222467, "kl": 0.2421875, "learning_rate": 7.858845759288197e-07, "loss": 0.0024828468449413776, "memory(GiB)": 147.2, "reward": 1.397599697113037, "reward_std": 0.20538488030433655, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28373873233795166, "rewards/EvidenceHallucination/std": 0.40919020771980286, "rewards/Evidence_Num_Record/mean": 5.38095235824585, "rewards/Evidence_Num_Record/std": 3.4987967014312744, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.3170424997806549, "rewards/VideoAccuracy/std": 0.4093017280101776, "step": 573, "train_speed(iter/s)": 0.017515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/mean_length": 459.90478515625, "completions/min_length": 307.0, "entropy/max": 0.4765625, "entropy/mean": 0.2890625, "entropy/min": 0.169921875, "epoch": 0.574, "grad_norm": 1.2462310660679652, "kl": 0.259765625, "learning_rate": 7.827859278994924e-07, "loss": 0.002635692711919546, "memory(GiB)": 147.2, "reward": 2.3556113243103027, "reward_std": 0.18688201904296875, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6333656311035156, "rewards/EvidenceHallucination/std": 0.39550983905792236, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 1.3582885265350342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 1.067033290863037, "rewards/VideoAccuracy/std": 0.3786657154560089, "step": 574, "train_speed(iter/s)": 0.017512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1172.0, "completions/mean_length": 519.6666870117188, "completions/min_length": 345.0, "entropy/max": 1.546875, "entropy/mean": 0.3828125, "entropy/min": 0.1376953125, "epoch": 0.575, "grad_norm": 1.1636934037261435, "kl": 0.2255859375, "learning_rate": 7.796894672134593e-07, "loss": 0.0023174858652055264, "memory(GiB)": 147.2, "reward": 1.8038508892059326, "reward_std": 0.2878847122192383, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4917738735675812, "rewards/EvidenceHallucination/std": 0.4435805678367615, "rewards/Evidence_Num_Record/mean": 5.095238208770752, "rewards/Evidence_Num_Record/std": 2.6208314895629883, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.6150199174880981, "rewards/VideoAccuracy/std": 0.4046759307384491, "step": 575, "train_speed(iter/s)": 0.017518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 874.0, "completions/mean_length": 485.8333435058594, "completions/min_length": 345.0, "entropy/max": 0.70703125, "entropy/mean": 0.37109375, "entropy/min": 0.1767578125, "epoch": 0.576, "grad_norm": 1.1660534427614697, "kl": 0.22265625, "learning_rate": 7.765952250520458e-07, "loss": 0.0022863138001412153, "memory(GiB)": 147.2, "reward": 1.7745193243026733, "reward_std": 0.2418813556432724, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4411916732788086, "rewards/EvidenceHallucination/std": 0.4348190426826477, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 1.9134889841079712, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.6005666255950928, "rewards/VideoAccuracy/std": 0.38164791464805603, "step": 576, "train_speed(iter/s)": 0.017513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1328.0, "completions/mean_length": 528.40478515625, "completions/min_length": 303.0, "entropy/max": 0.625, "entropy/mean": 0.337890625, "entropy/min": 0.13671875, "epoch": 0.577, "grad_norm": 1.2322061073052224, "kl": 0.255859375, "learning_rate": 7.735032325742355e-07, "loss": 0.0026552907656878233, "memory(GiB)": 147.2, "reward": 1.8820176124572754, "reward_std": 0.17488104104995728, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5127783417701721, "rewards/EvidenceHallucination/std": 0.42155125737190247, "rewards/Evidence_Num_Record/mean": 5.309524059295654, "rewards/Evidence_Num_Record/std": 3.1583268642425537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6794617772102356, "rewards/VideoAccuracy/std": 0.5325960516929626, "step": 577, "train_speed(iter/s)": 0.017511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/mean_length": 612.1428833007812, "completions/min_length": 353.0, "entropy/max": 0.5546875, "entropy/mean": 0.287109375, "entropy/min": 0.1103515625, "epoch": 0.578, "grad_norm": 1.0005613736355368, "kl": 0.2109375, "learning_rate": 7.704135209163588e-07, "loss": 0.0021892141085118055, "memory(GiB)": 147.2, "reward": 1.8001298904418945, "reward_std": 0.3935781419277191, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3724662661552429, "rewards/EvidenceHallucination/std": 0.42378610372543335, "rewards/Evidence_Num_Record/mean": 5.238095283508301, "rewards/Evidence_Num_Record/std": 2.535691738128662, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.597065269947052, "rewards/VideoAccuracy/std": 0.5219387412071228, "step": 578, "train_speed(iter/s)": 0.017508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/mean_length": 473.3809509277344, "completions/min_length": 320.0, "entropy/max": 0.62109375, "entropy/mean": 0.369140625, "entropy/min": 0.1962890625, "epoch": 0.579, "grad_norm": 1.2303466431057068, "kl": 0.2470703125, "learning_rate": 7.673261211917775e-07, "loss": 0.0025184685364365578, "memory(GiB)": 147.2, "reward": 1.8147884607315063, "reward_std": 0.2195645570755005, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5988144278526306, "rewards/EvidenceHallucination/std": 0.42997074127197266, "rewards/Evidence_Num_Record/mean": 4.833333492279053, "rewards/Evidence_Num_Record/std": 2.4881834983825684, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.6045495271682739, "rewards/VideoAccuracy/std": 0.4016510546207428, "step": 579, "train_speed(iter/s)": 0.017508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1126.0, "completions/mean_length": 472.8571472167969, "completions/min_length": 324.0, "entropy/max": 0.51953125, "entropy/mean": 0.375, "entropy/min": 0.173828125, "epoch": 0.58, "grad_norm": 1.3407291707619684, "kl": 0.255859375, "learning_rate": 7.642410644905726e-07, "loss": 0.0025993494782596827, "memory(GiB)": 147.2, "reward": 1.6960052251815796, "reward_std": 0.3615559935569763, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44124239683151245, "rewards/EvidenceHallucination/std": 0.4154648780822754, "rewards/Evidence_Num_Record/mean": 4.785714149475098, "rewards/Evidence_Num_Record/std": 2.3429782390594482, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.54585200548172, "rewards/VideoAccuracy/std": 0.43818435072898865, "step": 580, "train_speed(iter/s)": 0.017505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 765.0, "completions/mean_length": 453.1190490722656, "completions/min_length": 324.0, "entropy/max": 0.48046875, "entropy/mean": 0.255859375, "entropy/min": 0.125, "epoch": 0.581, "grad_norm": 1.0561630407134095, "kl": 0.259765625, "learning_rate": 7.61158381879231e-07, "loss": 0.0026355660520493984, "memory(GiB)": 147.2, "reward": 2.223803758621216, "reward_std": 0.10234322398900986, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46720772981643677, "rewards/EvidenceHallucination/std": 0.4397595524787903, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 0.7054623365402222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9303621649742126, "rewards/VideoAccuracy/std": 0.45081934332847595, "step": 581, "train_speed(iter/s)": 0.017508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 874.0, "completions/mean_length": 523.7380981445312, "completions/min_length": 324.0, "entropy/max": 0.5546875, "entropy/mean": 0.38671875, "entropy/min": 0.23828125, "epoch": 0.582, "grad_norm": 1.0339141819625919, "kl": 0.251953125, "learning_rate": 7.580781044003324e-07, "loss": 0.0025591151788830757, "memory(GiB)": 147.2, "reward": 1.577696681022644, "reward_std": 0.1125200018286705, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32422274351119995, "rewards/EvidenceHallucination/std": 0.41044461727142334, "rewards/Evidence_Num_Record/mean": 5.357142925262451, "rewards/Evidence_Num_Record/std": 2.034116744995117, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.446185439825058, "rewards/VideoAccuracy/std": 0.45348039269447327, "step": 582, "train_speed(iter/s)": 0.01751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1058.0, "completions/mean_length": 488.5714416503906, "completions/min_length": 312.0, "entropy/max": 0.91796875, "entropy/mean": 0.435546875, "entropy/min": 0.2890625, "epoch": 0.583, "grad_norm": 1.282970983136212, "kl": 0.263671875, "learning_rate": 7.550002630722365e-07, "loss": 0.002655723597854376, "memory(GiB)": 147.2, "reward": 1.706022024154663, "reward_std": 0.245122492313385, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3257083594799042, "rewards/EvidenceHallucination/std": 0.4306149482727051, "rewards/Evidence_Num_Record/mean": 4.928571701049805, "rewards/Evidence_Num_Record/std": 2.0047850608825684, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5742137432098389, "rewards/VideoAccuracy/std": 0.38398995995521545, "step": 583, "train_speed(iter/s)": 0.017507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 717.0, "completions/mean_length": 452.21429443359375, "completions/min_length": 300.0, "entropy/max": 0.53125, "entropy/mean": 0.333984375, "entropy/min": 0.16796875, "epoch": 0.584, "grad_norm": 1.1753810847670432, "kl": 0.26953125, "learning_rate": 7.519248888887715e-07, "loss": 0.0027270063292235136, "memory(GiB)": 147.2, "reward": 2.1703648567199707, "reward_std": 0.13654127717018127, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44237229228019714, "rewards/EvidenceHallucination/std": 0.4528614282608032, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.2230842113494873, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9152234792709351, "rewards/VideoAccuracy/std": 0.557100772857666, "step": 584, "train_speed(iter/s)": 0.017513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1760.0, "completions/mean_length": 486.8809509277344, "completions/min_length": 280.0, "entropy/max": 1.4765625, "entropy/mean": 0.388671875, "entropy/min": 0.1376953125, "epoch": 0.585, "grad_norm": 1.1676599210226963, "kl": 0.2451171875, "learning_rate": 7.488520128189208e-07, "loss": 0.00256509892642498, "memory(GiB)": 147.2, "reward": 1.6237132549285889, "reward_std": 0.21546559035778046, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14745573699474335, "rewards/EvidenceHallucination/std": 0.31591618061065674, "rewards/Evidence_Num_Record/mean": 4.952381134033203, "rewards/Evidence_Num_Record/std": 3.6018450260162354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.49898406863212585, "rewards/VideoAccuracy/std": 0.48285984992980957, "step": 585, "train_speed(iter/s)": 0.01751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 577.0, "completions/mean_length": 446.69049072265625, "completions/min_length": 331.0, "entropy/max": 1.0, "entropy/mean": 0.400390625, "entropy/min": 0.25390625, "epoch": 0.586, "grad_norm": 1.1332462195699249, "kl": 0.265625, "learning_rate": 7.457816658065132e-07, "loss": 0.002694082912057638, "memory(GiB)": 147.2, "reward": 1.4800655841827393, "reward_std": 0.29056552052497864, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.256216824054718, "rewards/EvidenceHallucination/std": 0.3800258934497833, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 0.8499504923820496, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.32882219552993774, "rewards/VideoAccuracy/std": 0.38793131709098816, "step": 586, "train_speed(iter/s)": 0.017512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 646.0, "completions/mean_length": 456.16668701171875, "completions/min_length": 346.0, "entropy/max": 0.7265625, "entropy/mean": 0.3828125, "entropy/min": 0.2392578125, "epoch": 0.587, "grad_norm": 1.1743006768939335, "kl": 0.283203125, "learning_rate": 7.427138787699085e-07, "loss": 0.0028411007951945066, "memory(GiB)": 147.2, "reward": 1.962119221687317, "reward_std": 0.27418652176856995, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5741019248962402, "rewards/EvidenceHallucination/std": 0.4399320185184479, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 0.7908447980880737, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.7520606517791748, "rewards/VideoAccuracy/std": 0.5276792645454407, "step": 587, "train_speed(iter/s)": 0.017511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 949.0, "completions/mean_length": 500.3571472167969, "completions/min_length": 327.0, "entropy/max": 1.4765625, "entropy/mean": 0.345703125, "entropy/min": 0.15625, "epoch": 0.588, "grad_norm": 0.9097728252200424, "kl": 0.2294921875, "learning_rate": 7.396486826016879e-07, "loss": 0.002346982713788748, "memory(GiB)": 147.2, "reward": 1.7889320850372314, "reward_std": 0.1560111939907074, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14849649369716644, "rewards/EvidenceHallucination/std": 0.2909368872642517, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 1.9500142335891724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.6306614279747009, "rewards/VideoAccuracy/std": 0.48010721802711487, "step": 588, "train_speed(iter/s)": 0.017511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 920.0, "completions/mean_length": 504.8571472167969, "completions/min_length": 275.0, "entropy/max": 0.6796875, "entropy/mean": 0.396484375, "entropy/min": 0.2275390625, "epoch": 0.589, "grad_norm": 1.3897835790727282, "kl": 0.2578125, "learning_rate": 7.365861081683433e-07, "loss": 0.0026217829436063766, "memory(GiB)": 147.2, "reward": 1.7859326601028442, "reward_std": 0.30997252464294434, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5363885164260864, "rewards/EvidenceHallucination/std": 0.4576357901096344, "rewards/Evidence_Num_Record/mean": 5.190476417541504, "rewards/Evidence_Num_Record/std": 1.811051368713379, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.588178813457489, "rewards/VideoAccuracy/std": 0.37847551703453064, "step": 589, "train_speed(iter/s)": 0.017506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 825.0, "completions/mean_length": 453.4761962890625, "completions/min_length": 264.0, "entropy/max": 0.5703125, "entropy/mean": 0.380859375, "entropy/min": 0.2177734375, "epoch": 0.59, "grad_norm": 1.3803185320621216, "kl": 0.267578125, "learning_rate": 7.335261863099651e-07, "loss": 0.002693876624107361, "memory(GiB)": 147.2, "reward": 1.5962520837783813, "reward_std": 0.34639328718185425, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3232976496219635, "rewards/EvidenceHallucination/std": 0.44262251257896423, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.5833408832550049, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.46968793869018555, "rewards/VideoAccuracy/std": 0.4826664924621582, "step": 590, "train_speed(iter/s)": 0.017508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 805.0, "completions/mean_length": 466.1190490722656, "completions/min_length": 305.0, "entropy/max": 0.458984375, "entropy/mean": 0.294921875, "entropy/min": 0.140625, "epoch": 0.591, "grad_norm": 1.1045732780872357, "kl": 0.2431640625, "learning_rate": 7.304689478399322e-07, "loss": 0.002461612457409501, "memory(GiB)": 147.2, "reward": 2.038516044616699, "reward_std": 0.10969673097133636, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5281686782836914, "rewards/EvidenceHallucination/std": 0.4227769374847412, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 0.5436787009239197, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7328824400901794, "rewards/VideoAccuracy/std": 0.47139081358909607, "step": 591, "train_speed(iter/s)": 0.017516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1060.0, "completions/mean_length": 469.452392578125, "completions/min_length": 244.0, "entropy/max": 1.046875, "entropy/mean": 0.44140625, "entropy/min": 0.2353515625, "epoch": 0.592, "grad_norm": 1.180903660171026, "kl": 0.275390625, "learning_rate": 7.274144235446023e-07, "loss": 0.0028012755792587996, "memory(GiB)": 147.2, "reward": 1.4012137651443481, "reward_std": 0.2524511516094208, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.25063470005989075, "rewards/EvidenceHallucination/std": 0.36757490038871765, "rewards/Evidence_Num_Record/mean": 5.452381134033203, "rewards/Evidence_Num_Record/std": 2.777982711791992, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.32251545786857605, "rewards/VideoAccuracy/std": 0.44714128971099854, "step": 592, "train_speed(iter/s)": 0.017481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 688.0, "completions/mean_length": 447.7857360839844, "completions/min_length": 307.0, "entropy/max": 0.5703125, "entropy/mean": 0.408203125, "entropy/min": 0.28125, "epoch": 0.593, "grad_norm": 1.3056742391318676, "kl": 0.279296875, "learning_rate": 7.243626441830009e-07, "loss": 0.002849259879440069, "memory(GiB)": 147.2, "reward": 1.611133098602295, "reward_std": 0.12857817113399506, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.374485045671463, "rewards/EvidenceHallucination/std": 0.42864927649497986, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.065780758857727, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.48385506868362427, "rewards/VideoAccuracy/std": 0.4231003522872925, "step": 593, "train_speed(iter/s)": 0.017483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 682.0, "completions/mean_length": 463.4761962890625, "completions/min_length": 279.0, "entropy/max": 0.65625, "entropy/mean": 0.310546875, "entropy/min": 0.130859375, "epoch": 0.594, "grad_norm": 1.1866145801590529, "kl": 0.265625, "learning_rate": 7.213136404865123e-07, "loss": 0.0026950924657285213, "memory(GiB)": 147.2, "reward": 2.351012945175171, "reward_std": 0.2195076197385788, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6800523400306702, "rewards/EvidenceHallucination/std": 0.3688565790653229, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.051518440246582, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 1.0483359098434448, "rewards/VideoAccuracy/std": 0.4914064407348633, "step": 594, "train_speed(iter/s)": 0.017488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 798.0, "completions/mean_length": 502.6428527832031, "completions/min_length": 330.0, "entropy/max": 0.8984375, "entropy/mean": 0.41015625, "entropy/min": 0.1435546875, "epoch": 0.595, "grad_norm": 1.0244119066397843, "kl": 0.2451171875, "learning_rate": 7.182674431585702e-07, "loss": 0.0024832345079630613, "memory(GiB)": 147.2, "reward": 1.6552233695983887, "reward_std": 0.21051955223083496, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44998759031295776, "rewards/EvidenceHallucination/std": 0.46245652437210083, "rewards/Evidence_Num_Record/mean": 4.785714149475098, "rewards/Evidence_Num_Record/std": 1.0249501466751099, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.4985591471195221, "rewards/VideoAccuracy/std": 0.4166490137577057, "step": 595, "train_speed(iter/s)": 0.017487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 635.0, "completions/mean_length": 437.7857360839844, "completions/min_length": 281.0, "entropy/max": 0.71484375, "entropy/mean": 0.416015625, "entropy/min": 0.259765625, "epoch": 0.596, "grad_norm": 1.2185757668145605, "kl": 0.287109375, "learning_rate": 7.152240828743477e-07, "loss": 0.0029028202407062054, "memory(GiB)": 147.2, "reward": 1.6317411661148071, "reward_std": 0.16950979828834534, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39286577701568604, "rewards/EvidenceHallucination/std": 0.4430376887321472, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.2916690111160278, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.4626917243003845, "rewards/VideoAccuracy/std": 0.37667131423950195, "step": 596, "train_speed(iter/s)": 0.017497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 611.0, "completions/mean_length": 428.71429443359375, "completions/min_length": 320.0, "entropy/max": 0.671875, "entropy/mean": 0.412109375, "entropy/min": 0.2431640625, "epoch": 0.597, "grad_norm": 1.38619725092822, "kl": 0.28515625, "learning_rate": 7.121835902804489e-07, "loss": 0.002899776678532362, "memory(GiB)": 147.2, "reward": 1.8062628507614136, "reward_std": 0.14738309383392334, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5170978307723999, "rewards/EvidenceHallucination/std": 0.47136178612709045, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.0339757204055786, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6028432250022888, "rewards/VideoAccuracy/std": 0.5212418437004089, "step": 597, "train_speed(iter/s)": 0.017495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 739.0, "completions/mean_length": 442.76190185546875, "completions/min_length": 287.0, "entropy/max": 0.77734375, "entropy/mean": 0.33203125, "entropy/min": 0.11279296875, "epoch": 0.598, "grad_norm": 1.255813910891952, "kl": 0.255859375, "learning_rate": 7.091459959946009e-07, "loss": 0.0025937955360859632, "memory(GiB)": 147.2, "reward": 1.9070651531219482, "reward_std": 0.27369827032089233, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49443018436431885, "rewards/EvidenceHallucination/std": 0.42579492926597595, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.859932780265808, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074110031128, "rewards/VideoAccuracy/mean": 0.6843696236610413, "rewards/VideoAccuracy/std": 0.4975552558898926, "step": 598, "train_speed(iter/s)": 0.017496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/mean_length": 460.16668701171875, "completions/min_length": 266.0, "entropy/max": 2.078125, "entropy/mean": 0.5390625, "entropy/min": 0.1728515625, "epoch": 0.599, "grad_norm": 1.222683216704578, "kl": 0.283203125, "learning_rate": 7.061113306053442e-07, "loss": 0.0029310905374586582, "memory(GiB)": 147.2, "reward": 1.7134166955947876, "reward_std": 0.2816122770309448, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4839991629123688, "rewards/EvidenceHallucination/std": 0.459764689207077, "rewards/Evidence_Num_Record/mean": 4.976190567016602, "rewards/Evidence_Num_Record/std": 2.4343886375427246, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5499501824378967, "rewards/VideoAccuracy/std": 0.44302186369895935, "step": 599, "train_speed(iter/s)": 0.017486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 579.0, "completions/mean_length": 437.5476379394531, "completions/min_length": 295.0, "entropy/max": 0.5625, "entropy/mean": 0.4140625, "entropy/min": 0.2197265625, "epoch": 0.6, "grad_norm": 1.1777310155517764, "kl": 0.263671875, "learning_rate": 7.030796246717255e-07, "loss": 0.002670376095920801, "memory(GiB)": 147.2, "reward": 1.5067311525344849, "reward_std": 0.24075977504253387, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2628157436847687, "rewards/EvidenceHallucination/std": 0.3982740640640259, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.0314452648162842, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.3922632336616516, "rewards/VideoAccuracy/std": 0.4786408841609955, "step": 600, "train_speed(iter/s)": 0.017489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 733.0, "completions/mean_length": 490.3333435058594, "completions/min_length": 305.0, "entropy/max": 0.5859375, "entropy/mean": 0.259765625, "entropy/min": 0.11865234375, "epoch": 0.601, "grad_norm": 1.0637205709825024, "kl": 0.23828125, "learning_rate": 7.000509087229894e-07, "loss": 0.002422991441562772, "memory(GiB)": 147.2, "reward": 2.0810062885284424, "reward_std": 0.13841350376605988, "rewards/EvidenceFormat/mean": 0.9523809552192688, "rewards/EvidenceFormat/std": 0.21554027497768402, "rewards/EvidenceHallucination/mean": 0.33854150772094727, "rewards/EvidenceHallucination/std": 0.4618864953517914, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.134661316871643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8809524178504944, "rewards/HonestTime/std": 0.32777005434036255, "rewards/VideoAccuracy/mean": 0.8609171509742737, "rewards/VideoAccuracy/std": 0.4651089906692505, "step": 601, "train_speed(iter/s)": 0.01747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 748.0, "completions/mean_length": 488.9761962890625, "completions/min_length": 307.0, "entropy/max": 1.015625, "entropy/mean": 0.53125, "entropy/min": 0.30859375, "epoch": 0.602, "grad_norm": 0.9703941666790854, "kl": 0.28515625, "learning_rate": 6.970252132582728e-07, "loss": 0.0028867856599390507, "memory(GiB)": 147.2, "reward": 1.5431602001190186, "reward_std": 0.09946687519550323, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37068021297454834, "rewards/EvidenceHallucination/std": 0.40233659744262695, "rewards/Evidence_Num_Record/mean": 5.0714287757873535, "rewards/Evidence_Num_Record/std": 1.5522265434265137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.4404527544975281, "rewards/VideoAccuracy/std": 0.4748918116092682, "step": 602, "train_speed(iter/s)": 0.017466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 673.0, "completions/mean_length": 407.8333435058594, "completions/min_length": 288.0, "entropy/max": 0.54296875, "entropy/mean": 0.39453125, "entropy/min": 0.25390625, "epoch": 0.603, "grad_norm": 1.2840891486656645, "kl": 0.291015625, "learning_rate": 6.940025687462952e-07, "loss": 0.0029300376772880554, "memory(GiB)": 147.2, "reward": 1.7167476415634155, "reward_std": 0.24148140847682953, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.536012589931488, "rewards/EvidenceHallucination/std": 0.4745959937572479, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.7213357090950012, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.5476404428482056, "rewards/VideoAccuracy/std": 0.4502407908439636, "step": 603, "train_speed(iter/s)": 0.017468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 597.0, "completions/mean_length": 410.4047546386719, "completions/min_length": 282.0, "entropy/max": 0.51953125, "entropy/mean": 0.341796875, "entropy/min": 0.1318359375, "epoch": 0.604, "grad_norm": 1.3496509722491907, "kl": 0.3046875, "learning_rate": 6.909830056250526e-07, "loss": 0.0030762115493416786, "memory(GiB)": 147.2, "reward": 2.4725546836853027, "reward_std": 0.2338365763425827, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7553823590278625, "rewards/EvidenceHallucination/std": 0.3370148837566376, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.8406761288642883, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 1.1548113822937012, "rewards/VideoAccuracy/std": 0.5124393701553345, "step": 604, "train_speed(iter/s)": 0.017471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1046.0, "completions/mean_length": 491.21429443359375, "completions/min_length": 301.0, "entropy/max": 1.4375, "entropy/mean": 0.34375, "entropy/min": 0.1318359375, "epoch": 0.605, "grad_norm": 1.0787786489605784, "kl": 0.2333984375, "learning_rate": 6.87966554301513e-07, "loss": 0.0023751643020659685, "memory(GiB)": 147.2, "reward": 1.8977985382080078, "reward_std": 0.2284865379333496, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4612298905849457, "rewards/EvidenceHallucination/std": 0.41398024559020996, "rewards/Evidence_Num_Record/mean": 4.6666669845581055, "rewards/Evidence_Num_Record/std": 1.6028430461883545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7388858199119568, "rewards/VideoAccuracy/std": 0.4245954751968384, "step": 605, "train_speed(iter/s)": 0.01747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/mean_length": 459.9285888671875, "completions/min_length": 323.0, "entropy/max": 0.70703125, "entropy/mean": 0.439453125, "entropy/min": 0.310546875, "epoch": 0.606, "grad_norm": 1.2847746652791245, "kl": 0.271484375, "learning_rate": 6.849532451513073e-07, "loss": 0.0027352008037269115, "memory(GiB)": 147.2, "reward": 1.4930458068847656, "reward_std": 0.1793939769268036, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21473932266235352, "rewards/EvidenceHallucination/std": 0.36765056848526, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 0.8873874545097351, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.3500978350639343, "rewards/VideoAccuracy/std": 0.41116583347320557, "step": 606, "train_speed(iter/s)": 0.017463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/mean_length": 421.3095397949219, "completions/min_length": 280.0, "entropy/max": 0.5859375, "entropy/mean": 0.412109375, "entropy/min": 0.251953125, "epoch": 0.607, "grad_norm": 1.2772980962615592, "kl": 0.2890625, "learning_rate": 6.819431085184251e-07, "loss": 0.0029103453271090984, "memory(GiB)": 147.2, "reward": 1.8208121061325073, "reward_std": 0.14132027328014374, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48530933260917664, "rewards/EvidenceHallucination/std": 0.4333818256855011, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 0.8025076389312744, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.6285120844841003, "rewards/VideoAccuracy/std": 0.532863438129425, "step": 607, "train_speed(iter/s)": 0.017465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1294.0, "completions/mean_length": 563.0952758789062, "completions/min_length": 319.0, "entropy/max": 1.9296875, "entropy/mean": 0.330078125, "entropy/min": 0.126953125, "epoch": 0.608, "grad_norm": 0.982542051536595, "kl": 0.2275390625, "learning_rate": 6.789361747149092e-07, "loss": 0.0023674024268984795, "memory(GiB)": 147.2, "reward": 1.9754973649978638, "reward_std": 0.06728312373161316, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37929102778434753, "rewards/EvidenceHallucination/std": 0.4013298451900482, "rewards/Evidence_Num_Record/mean": 5.11904764175415, "rewards/Evidence_Num_Record/std": 2.8982954025268555, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.7758294939994812, "rewards/VideoAccuracy/std": 0.46641021966934204, "step": 608, "train_speed(iter/s)": 0.017465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 780.0, "completions/mean_length": 469.0952453613281, "completions/min_length": 280.0, "entropy/max": 0.953125, "entropy/mean": 0.4140625, "entropy/min": 0.2001953125, "epoch": 0.609, "grad_norm": 1.3475060514297332, "kl": 0.271484375, "learning_rate": 6.759324740205495e-07, "loss": 0.0027493652887642384, "memory(GiB)": 147.2, "reward": 1.913028597831726, "reward_std": 0.2779674530029297, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6431964635848999, "rewards/EvidenceHallucination/std": 0.40461066365242004, "rewards/Evidence_Num_Record/mean": 4.928571701049805, "rewards/Evidence_Num_Record/std": 1.9049724340438843, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.717722475528717, "rewards/VideoAccuracy/std": 0.37390825152397156, "step": 609, "train_speed(iter/s)": 0.017459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1650.0, "completions/mean_length": 467.1190490722656, "completions/min_length": 344.0, "entropy/max": 0.6796875, "entropy/mean": 0.419921875, "entropy/min": 0.2216796875, "epoch": 0.61, "grad_norm": 1.2935326073326132, "kl": 0.283203125, "learning_rate": 6.729320366825783e-07, "loss": 0.002889070427045226, "memory(GiB)": 147.2, "reward": 1.356255292892456, "reward_std": 0.21071168780326843, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13856059312820435, "rewards/EvidenceHallucination/std": 0.31749221682548523, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 3.5196478366851807, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.2618764638900757, "rewards/VideoAccuracy/std": 0.27847543358802795, "step": 610, "train_speed(iter/s)": 0.017455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 825.0, "completions/mean_length": 515.6190795898438, "completions/min_length": 291.0, "entropy/max": 0.5859375, "entropy/mean": 0.326171875, "entropy/min": 0.130859375, "epoch": 0.611, "grad_norm": 1.2379947419241533, "kl": 0.2333984375, "learning_rate": 6.699348929153668e-07, "loss": 0.002381596015766263, "memory(GiB)": 147.2, "reward": 1.9828771352767944, "reward_std": 0.19084368646144867, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.43019285798072815, "rewards/EvidenceHallucination/std": 0.4468642771244049, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.292343258857727, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9285714626312256, "rewards/HonestTime/std": 0.26066118478775024, "rewards/VideoAccuracy/mean": 0.7111242413520813, "rewards/VideoAccuracy/std": 0.34175026416778564, "step": 611, "train_speed(iter/s)": 0.017455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1598.0, "completions/mean_length": 503.5952453613281, "completions/min_length": 346.0, "entropy/max": 1.5546875, "entropy/mean": 0.484375, "entropy/min": 0.1650390625, "epoch": 0.612, "grad_norm": 1.2842849121027213, "kl": 0.283203125, "learning_rate": 6.669410729001193e-07, "loss": 0.002932178322225809, "memory(GiB)": 147.2, "reward": 1.6253175735473633, "reward_std": 0.32313433289527893, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46873438358306885, "rewards/EvidenceHallucination/std": 0.4804971516132355, "rewards/Evidence_Num_Record/mean": 5.333333492279053, "rewards/Evidence_Num_Record/std": 3.159705400466919, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.4982372522354126, "rewards/VideoAccuracy/std": 0.4757577180862427, "step": 612, "train_speed(iter/s)": 0.01745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 657.0, "completions/mean_length": 454.952392578125, "completions/min_length": 294.0, "entropy/max": 0.66796875, "entropy/mean": 0.44140625, "entropy/min": 0.255859375, "epoch": 0.613, "grad_norm": 1.3884812004983706, "kl": 0.283203125, "learning_rate": 6.639506067845698e-07, "loss": 0.002851371420547366, "memory(GiB)": 147.2, "reward": 1.5651453733444214, "reward_std": 0.34722527861595154, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40665119886398315, "rewards/EvidenceHallucination/std": 0.46054762601852417, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.383493185043335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.4314342439174652, "rewards/VideoAccuracy/std": 0.4196312427520752, "step": 613, "train_speed(iter/s)": 0.017448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 598.0, "completions/mean_length": 421.4285888671875, "completions/min_length": 335.0, "entropy/max": 0.64453125, "entropy/mean": 0.369140625, "entropy/min": 0.119140625, "epoch": 0.614, "grad_norm": 1.3214791874769356, "kl": 0.294921875, "learning_rate": 6.609635246826793e-07, "loss": 0.002968190936371684, "memory(GiB)": 147.2, "reward": 2.231680154800415, "reward_std": 0.18747715651988983, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6672221422195435, "rewards/EvidenceHallucination/std": 0.39013221859931946, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.8968262076377869, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9315690994262695, "rewards/VideoAccuracy/std": 0.45265018939971924, "step": 614, "train_speed(iter/s)": 0.01745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 777.0, "completions/mean_length": 510.71429443359375, "completions/min_length": 353.0, "entropy/max": 1.7109375, "entropy/mean": 0.46875, "entropy/min": 0.1318359375, "epoch": 0.615, "grad_norm": 1.275982360305517, "kl": 0.251953125, "learning_rate": 6.579798566743313e-07, "loss": 0.0025584520772099495, "memory(GiB)": 147.2, "reward": 1.7848589420318604, "reward_std": 0.32896852493286133, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33377861976623535, "rewards/EvidenceHallucination/std": 0.38658350706100464, "rewards/Evidence_Num_Record/mean": 5.190476417541504, "rewards/Evidence_Num_Record/std": 1.4523000717163086, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.6561984419822693, "rewards/VideoAccuracy/std": 0.45942410826683044, "step": 615, "train_speed(iter/s)": 0.017444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 628.0, "completions/mean_length": 447.5714416503906, "completions/min_length": 317.0, "entropy/max": 0.7265625, "entropy/mean": 0.4375, "entropy/min": 0.283203125, "epoch": 0.616, "grad_norm": 1.1098779874054865, "kl": 0.279296875, "learning_rate": 6.549996328050296e-07, "loss": 0.0028115222230553627, "memory(GiB)": 147.2, "reward": 1.5975712537765503, "reward_std": 0.13794991374015808, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37659451365470886, "rewards/EvidenceHallucination/std": 0.4494171142578125, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.137727975845337, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.42225220799446106, "rewards/VideoAccuracy/std": 0.38261863589286804, "step": 616, "train_speed(iter/s)": 0.017446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 701.0, "completions/mean_length": 444.21429443359375, "completions/min_length": 310.0, "entropy/max": 0.56640625, "entropy/mean": 0.4140625, "entropy/min": 0.248046875, "epoch": 0.617, "grad_norm": 1.3243024689622669, "kl": 0.298828125, "learning_rate": 6.52022883085595e-07, "loss": 0.0030175037682056427, "memory(GiB)": 147.2, "reward": 1.6268917322158813, "reward_std": 0.39017921686172485, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.266214519739151, "rewards/EvidenceHallucination/std": 0.4012203812599182, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 1.0820035934448242, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.47841063141822815, "rewards/VideoAccuracy/std": 0.5621667504310608, "step": 617, "train_speed(iter/s)": 0.017444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1220.0, "completions/mean_length": 506.6428527832031, "completions/min_length": 321.0, "entropy/max": 2.234375, "entropy/mean": 0.384765625, "entropy/min": 0.12890625, "epoch": 0.618, "grad_norm": 1.0431495374899582, "kl": 0.2373046875, "learning_rate": 6.490496374918646e-07, "loss": 0.002435671165585518, "memory(GiB)": 147.2, "reward": 1.9094020128250122, "reward_std": 0.16423994302749634, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3320138156414032, "rewards/EvidenceHallucination/std": 0.43036186695098877, "rewards/Evidence_Num_Record/mean": 4.904761791229248, "rewards/Evidence_Num_Record/std": 2.8353991508483887, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7096660137176514, "rewards/VideoAccuracy/std": 0.5061018466949463, "step": 618, "train_speed(iter/s)": 0.017446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 833.0, "completions/mean_length": 495.8571472167969, "completions/min_length": 318.0, "entropy/max": 0.70703125, "entropy/mean": 0.40625, "entropy/min": 0.2421875, "epoch": 0.619, "grad_norm": 0.8913717456101545, "kl": 0.27734375, "learning_rate": 6.460799259643883e-07, "loss": 0.0028232461772859097, "memory(GiB)": 147.2, "reward": 1.402701497077942, "reward_std": 0.1259581446647644, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24587325751781464, "rewards/EvidenceHallucination/std": 0.39733564853668213, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 1.4979660511016846, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.29162219166755676, "rewards/VideoAccuracy/std": 0.3946917951107025, "step": 619, "train_speed(iter/s)": 0.017446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 644.0, "completions/mean_length": 408.73809814453125, "completions/min_length": 268.0, "entropy/max": 0.6328125, "entropy/mean": 0.419921875, "entropy/min": 0.25390625, "epoch": 0.62, "grad_norm": 1.4400826272655762, "kl": 0.294921875, "learning_rate": 6.431137784081281e-07, "loss": 0.002960315439850092, "memory(GiB)": 147.2, "reward": 1.6624799966812134, "reward_std": 0.2637644112110138, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30846282839775085, "rewards/EvidenceHallucination/std": 0.3980914354324341, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.0215448141098022, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5388826727867126, "rewards/VideoAccuracy/std": 0.5325735211372375, "step": 620, "train_speed(iter/s)": 0.01745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 834.0, "completions/mean_length": 520.90478515625, "completions/min_length": 335.0, "entropy/max": 0.54296875, "entropy/mean": 0.30859375, "entropy/min": 0.14453125, "epoch": 0.621, "grad_norm": 1.0983362750381223, "kl": 0.2451171875, "learning_rate": 6.401512246921575e-07, "loss": 0.0024788822047412395, "memory(GiB)": 147.2, "reward": 1.777593731880188, "reward_std": 0.09118415415287018, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14555399119853973, "rewards/EvidenceHallucination/std": 0.3323424160480499, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 0.8006965517997742, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9047619104385376, "rewards/HonestTime/std": 0.297101765871048, "rewards/VideoAccuracy/mean": 0.5675305128097534, "rewards/VideoAccuracy/std": 0.3709167242050171, "step": 621, "train_speed(iter/s)": 0.017452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1656.0, "completions/mean_length": 524.9285888671875, "completions/min_length": 269.0, "entropy/max": 1.28125, "entropy/mean": 0.47265625, "entropy/min": 0.1416015625, "epoch": 0.622, "grad_norm": 1.3260165857487658, "kl": 0.26953125, "learning_rate": 6.371922946493591e-07, "loss": 0.0027844994328916073, "memory(GiB)": 147.2, "reward": 1.8846853971481323, "reward_std": 0.11145021766424179, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5921638011932373, "rewards/EvidenceHallucination/std": 0.3846372067928314, "rewards/Evidence_Num_Record/mean": 6.142857074737549, "rewards/Evidence_Num_Record/std": 4.170578956604004, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7329192757606506, "rewards/VideoAccuracy/std": 0.39959076046943665, "step": 622, "train_speed(iter/s)": 0.017447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2057.0, "completions/mean_length": 524.1666870117188, "completions/min_length": 345.0, "entropy/max": 0.58203125, "entropy/mean": 0.4140625, "entropy/min": 0.1123046875, "epoch": 0.623, "grad_norm": 1.1080644567091926, "kl": 0.271484375, "learning_rate": 6.342370180761255e-07, "loss": 0.0028053666464984417, "memory(GiB)": 147.2, "reward": 1.4755423069000244, "reward_std": 0.21746203303337097, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3213748037815094, "rewards/EvidenceHallucination/std": 0.44025641679763794, "rewards/Evidence_Num_Record/mean": 5.214285850524902, "rewards/Evidence_Num_Record/std": 4.093634128570557, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.34936246275901794, "rewards/VideoAccuracy/std": 0.35842636227607727, "step": 623, "train_speed(iter/s)": 0.017442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/mean_length": 449.2857360839844, "completions/min_length": 349.0, "entropy/max": 0.81640625, "entropy/mean": 0.388671875, "entropy/min": 0.166015625, "epoch": 0.624, "grad_norm": 1.249943430402762, "kl": 0.279296875, "learning_rate": 6.312854247320594e-07, "loss": 0.002804760355502367, "memory(GiB)": 147.2, "reward": 1.978733777999878, "reward_std": 0.21570143103599548, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3235350251197815, "rewards/EvidenceHallucination/std": 0.423623651266098, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.0744633674621582, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7473601698875427, "rewards/VideoAccuracy/std": 0.5739562511444092, "step": 624, "train_speed(iter/s)": 0.017444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/mean_length": 480.69049072265625, "completions/min_length": 324.0, "entropy/max": 1.59375, "entropy/mean": 0.515625, "entropy/min": 0.12109375, "epoch": 0.625, "grad_norm": 0.7677987436654703, "kl": 0.255859375, "learning_rate": 6.283375443396726e-07, "loss": 0.0026296665892004967, "memory(GiB)": 147.2, "reward": 1.5685820579528809, "reward_std": 0.06617039442062378, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1186329573392868, "rewards/EvidenceHallucination/std": 0.27648842334747314, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 2.921743631362915, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.48295068740844727, "rewards/VideoAccuracy/std": 0.5402042865753174, "step": 625, "train_speed(iter/s)": 0.017442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 677.0, "completions/mean_length": 476.1428527832031, "completions/min_length": 313.0, "entropy/max": 0.94921875, "entropy/mean": 0.49609375, "entropy/min": 0.3515625, "epoch": 0.626, "grad_norm": 1.4090372368111226, "kl": 0.265625, "learning_rate": 6.253934065840879e-07, "loss": 0.0027147873770445585, "memory(GiB)": 147.2, "reward": 1.6841174364089966, "reward_std": 0.2543501853942871, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3653638958930969, "rewards/EvidenceHallucination/std": 0.4579576551914215, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 1.0135550498962402, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5476190447807312, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.5015208721160889, "rewards/VideoAccuracy/std": 0.39004263281822205, "step": 626, "train_speed(iter/s)": 0.017447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 895.0, "completions/mean_length": 462.8095397949219, "completions/min_length": 345.0, "entropy/max": 0.71875, "entropy/mean": 0.421875, "entropy/min": 0.2578125, "epoch": 0.627, "grad_norm": 1.2906780037915064, "kl": 0.275390625, "learning_rate": 6.224530411127402e-07, "loss": 0.0027889562770724297, "memory(GiB)": 147.2, "reward": 1.4440864324569702, "reward_std": 0.21193701028823853, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20968188345432281, "rewards/EvidenceHallucination/std": 0.385572224855423, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 1.4337884187698364, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.30215004086494446, "rewards/VideoAccuracy/std": 0.45778700709342957, "step": 627, "train_speed(iter/s)": 0.017454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 806.0, "completions/mean_length": 454.3571472167969, "completions/min_length": 343.0, "entropy/max": 1.7734375, "entropy/mean": 0.435546875, "entropy/min": 0.1376953125, "epoch": 0.628, "grad_norm": 1.1677469239179248, "kl": 0.251953125, "learning_rate": 6.19516477535077e-07, "loss": 0.002543492242693901, "memory(GiB)": 147.2, "reward": 1.5812627077102661, "reward_std": 0.26526641845703125, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14464905858039856, "rewards/EvidenceHallucination/std": 0.3285929560661316, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.797533392906189, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.4285232424736023, "rewards/VideoAccuracy/std": 0.4419552981853485, "step": 628, "train_speed(iter/s)": 0.017464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 628.0, "completions/mean_length": 448.0, "completions/min_length": 267.0, "entropy/max": 1.0859375, "entropy/mean": 0.51171875, "entropy/min": 0.2890625, "epoch": 0.629, "grad_norm": 1.4038832123707443, "kl": 0.279296875, "learning_rate": 6.165837454222607e-07, "loss": 0.0028509548865258694, "memory(GiB)": 147.2, "reward": 1.949292540550232, "reward_std": 0.18202029168605804, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6369680762290955, "rewards/EvidenceHallucination/std": 0.3774639964103699, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 0.9169965386390686, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.76475590467453, "rewards/VideoAccuracy/std": 0.36503830552101135, "step": 629, "train_speed(iter/s)": 0.017465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1668.0, "completions/mean_length": 486.3095397949219, "completions/min_length": 334.0, "entropy/max": 0.81640625, "entropy/mean": 0.419921875, "entropy/min": 0.1318359375, "epoch": 0.63, "grad_norm": 1.0980329531771842, "kl": 0.267578125, "learning_rate": 6.136548743068712e-07, "loss": 0.0029643033631145954, "memory(GiB)": 147.2, "reward": 1.3012635707855225, "reward_std": 0.21980595588684082, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14249320328235626, "rewards/EvidenceHallucination/std": 0.3265673518180847, "rewards/Evidence_Num_Record/mean": 5.261904716491699, "rewards/Evidence_Num_Record/std": 3.3645172119140625, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.17276491224765778, "rewards/VideoAccuracy/std": 0.3082710802555084, "step": 630, "train_speed(iter/s)": 0.017462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 739.0, "completions/mean_length": 486.3333435058594, "completions/min_length": 362.0, "entropy/max": 0.5859375, "entropy/mean": 0.3359375, "entropy/min": 0.1796875, "epoch": 0.631, "grad_norm": 1.171876469327531, "kl": 0.251953125, "learning_rate": 6.107298936826086e-07, "loss": 0.002529977587983012, "memory(GiB)": 147.2, "reward": 1.9414401054382324, "reward_std": 0.07969477772712708, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32001227140426636, "rewards/EvidenceHallucination/std": 0.43706533312797546, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.8025076985359192, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6774377226829529, "rewards/VideoAccuracy/std": 0.5147029161453247, "step": 631, "train_speed(iter/s)": 0.01747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 917.0, "completions/mean_length": 503.3333435058594, "completions/min_length": 383.0, "entropy/max": 1.0546875, "entropy/mean": 0.455078125, "entropy/min": 0.24609375, "epoch": 0.632, "grad_norm": 1.2356169279271694, "kl": 0.283203125, "learning_rate": 6.078088330039944e-07, "loss": 0.002893433440476656, "memory(GiB)": 147.2, "reward": 1.5653852224349976, "reward_std": 0.27202343940734863, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3666588068008423, "rewards/EvidenceHallucination/std": 0.4319332540035248, "rewards/Evidence_Num_Record/mean": 5.1666669845581055, "rewards/Evidence_Num_Record/std": 1.4468917846679688, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.46348196268081665, "rewards/VideoAccuracy/std": 0.4616753160953522, "step": 632, "train_speed(iter/s)": 0.017473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 603.0, "completions/mean_length": 472.1190490722656, "completions/min_length": 356.0, "entropy/max": 0.7890625, "entropy/mean": 0.447265625, "entropy/min": 0.287109375, "epoch": 0.633, "grad_norm": 1.4942046208513473, "kl": 0.255859375, "learning_rate": 6.04891721686078e-07, "loss": 0.002583180321380496, "memory(GiB)": 147.2, "reward": 1.4299159049987793, "reward_std": 0.4242191016674042, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430335700511932, "rewards/EvidenceHallucination/mean": 0.2303914874792099, "rewards/EvidenceHallucination/std": 0.399456262588501, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.0969284772872925, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4047619104385376, "rewards/HonestTime/std": 0.49679577350616455, "rewards/VideoAccuracy/mean": 0.3147900402545929, "rewards/VideoAccuracy/std": 0.35890620946884155, "step": 633, "train_speed(iter/s)": 0.017474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 671.0, "completions/mean_length": 444.1428527832031, "completions/min_length": 328.0, "entropy/max": 0.443359375, "entropy/mean": 0.357421875, "entropy/min": 0.1787109375, "epoch": 0.634, "grad_norm": 1.2530109926746207, "kl": 0.294921875, "learning_rate": 6.01978589104138e-07, "loss": 0.002966479165479541, "memory(GiB)": 147.2, "reward": 2.078141689300537, "reward_std": 0.14186517894268036, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4167763590812683, "rewards/EvidenceHallucination/std": 0.42956098914146423, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 0.6043781638145447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8281196355819702, "rewards/VideoAccuracy/std": 0.7123571038246155, "step": 634, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 753.0, "completions/mean_length": 447.19049072265625, "completions/min_length": 326.0, "entropy/max": 1.28125, "entropy/mean": 0.455078125, "entropy/min": 0.12451171875, "epoch": 0.635, "grad_norm": 1.197780254249038, "kl": 0.263671875, "learning_rate": 5.990694645933865e-07, "loss": 0.002655822318047285, "memory(GiB)": 147.2, "reward": 1.9410308599472046, "reward_std": 0.12248065322637558, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5052928328514099, "rewards/EvidenceHallucination/std": 0.4558258354663849, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.3139561414718628, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7733056545257568, "rewards/VideoAccuracy/std": 0.47066211700439453, "step": 635, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 610.0, "completions/mean_length": 411.71429443359375, "completions/min_length": 237.0, "entropy/max": 0.578125, "entropy/mean": 0.408203125, "entropy/min": 0.2421875, "epoch": 0.636, "grad_norm": 1.3596745525669296, "kl": 0.28515625, "learning_rate": 5.961643774486753e-07, "loss": 0.0028728009201586246, "memory(GiB)": 147.2, "reward": 1.8870395421981812, "reward_std": 0.24997267127037048, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5736715793609619, "rewards/EvidenceHallucination/std": 0.4410237669944763, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.1129581928253174, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.6437338590621948, "rewards/VideoAccuracy/std": 0.3046031892299652, "step": 636, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 576.0, "completions/mean_length": 419.3571472167969, "completions/min_length": 286.0, "entropy/max": 0.5703125, "entropy/mean": 0.392578125, "entropy/min": 0.25, "epoch": 0.637, "grad_norm": 1.371929928590567, "kl": 0.279296875, "learning_rate": 5.932633569241999e-07, "loss": 0.0028081792406737804, "memory(GiB)": 147.2, "reward": 2.0611584186553955, "reward_std": 0.25506219267845154, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5297446846961975, "rewards/EvidenceHallucination/std": 0.43175989389419556, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.8968262076377869, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8552095293998718, "rewards/VideoAccuracy/std": 0.6414136290550232, "step": 637, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 580.0, "completions/mean_length": 450.0476379394531, "completions/min_length": 311.0, "entropy/max": 0.5703125, "entropy/mean": 0.341796875, "entropy/min": 0.1494140625, "epoch": 0.638, "grad_norm": 1.2919471704825667, "kl": 0.25390625, "learning_rate": 5.903664322332047e-07, "loss": 0.0025535766035318375, "memory(GiB)": 147.2, "reward": 2.08666729927063, "reward_std": 0.269299179315567, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4039100110530853, "rewards/EvidenceHallucination/std": 0.458482027053833, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.9699312448501587, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.8820759057998657, "rewards/VideoAccuracy/std": 0.46471455693244934, "step": 638, "train_speed(iter/s)": 0.017486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/mean_length": 450.0238037109375, "completions/min_length": 344.0, "entropy/max": 0.71875, "entropy/mean": 0.44921875, "entropy/min": 0.310546875, "epoch": 0.639, "grad_norm": 1.3534914088870265, "kl": 0.2734375, "learning_rate": 5.874736325476889e-07, "loss": 0.002750057727098465, "memory(GiB)": 147.2, "reward": 1.7882862091064453, "reward_std": 0.10772984474897385, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49877476692199707, "rewards/EvidenceHallucination/std": 0.44282642006874084, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 0.7904776334762573, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.6266263127326965, "rewards/VideoAccuracy/std": 0.447672963142395, "step": 639, "train_speed(iter/s)": 0.017481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 687.0, "completions/mean_length": 423.0, "completions/min_length": 298.0, "entropy/max": 0.71875, "entropy/mean": 0.416015625, "entropy/min": 0.2890625, "epoch": 0.64, "grad_norm": 1.288198381472419, "kl": 0.29296875, "learning_rate": 5.845849869981136e-07, "loss": 0.0029476440977305174, "memory(GiB)": 147.2, "reward": 1.497715711593628, "reward_std": 0.15667381882667542, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16764487326145172, "rewards/EvidenceHallucination/std": 0.3352504074573517, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.8249872326850891, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.3641866147518158, "rewards/VideoAccuracy/std": 0.5057440996170044, "step": 640, "train_speed(iter/s)": 0.017483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 661.0, "completions/mean_length": 459.3095397949219, "completions/min_length": 283.0, "entropy/max": 0.515625, "entropy/mean": 0.296875, "entropy/min": 0.1357421875, "epoch": 0.641, "grad_norm": 1.1413185333078562, "kl": 0.255859375, "learning_rate": 5.817005246731073e-07, "loss": 0.0025705297011882067, "memory(GiB)": 147.2, "reward": 2.4359419345855713, "reward_std": 0.122245192527771, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6870543956756592, "rewards/EvidenceHallucination/std": 0.35847052931785583, "rewards/Evidence_Num_Record/mean": 3.809523820877075, "rewards/Evidence_Num_Record/std": 0.706696093082428, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9285714626312256, "rewards/HonestTime/std": 0.26066118478775024, "rewards/VideoAccuracy/mean": 1.1128168106079102, "rewards/VideoAccuracy/std": 0.4972495436668396, "step": 641, "train_speed(iter/s)": 0.017483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1661.0, "completions/mean_length": 457.952392578125, "completions/min_length": 308.0, "entropy/max": 1.5703125, "entropy/mean": 0.4921875, "entropy/min": 0.1220703125, "epoch": 0.642, "grad_norm": 1.4330868757528463, "kl": 0.291015625, "learning_rate": 5.788202746191734e-07, "loss": 0.003061532974243164, "memory(GiB)": 147.2, "reward": 1.9305227994918823, "reward_std": 0.23124870657920837, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6630523204803467, "rewards/EvidenceHallucination/std": 0.3898063004016876, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 4.09044075012207, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7645788788795471, "rewards/VideoAccuracy/std": 0.39655032753944397, "step": 642, "train_speed(iter/s)": 0.017481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 654.0, "completions/mean_length": 446.0952453613281, "completions/min_length": 252.0, "entropy/max": 0.57421875, "entropy/mean": 0.4140625, "entropy/min": 0.23828125, "epoch": 0.643, "grad_norm": 1.11793454116198, "kl": 0.2578125, "learning_rate": 5.759442658403985e-07, "loss": 0.002604592591524124, "memory(GiB)": 147.2, "reward": 1.504928469657898, "reward_std": 0.1708807647228241, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2890865206718445, "rewards/EvidenceHallucination/std": 0.41946476697921753, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 1.0493069887161255, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.38044440746307373, "rewards/VideoAccuracy/std": 0.41669321060180664, "step": 643, "train_speed(iter/s)": 0.017481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 684.0, "completions/mean_length": 455.40478515625, "completions/min_length": 352.0, "entropy/max": 0.48046875, "entropy/mean": 0.361328125, "entropy/min": 0.177734375, "epoch": 0.644, "grad_norm": 1.258093882493503, "kl": 0.271484375, "learning_rate": 5.730725272981583e-07, "loss": 0.0027431691996753216, "memory(GiB)": 147.2, "reward": 2.50679087638855, "reward_std": 0.1791459619998932, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.8189927339553833, "rewards/EvidenceHallucination/std": 0.3137473165988922, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 0.6043781638145447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.785714328289032, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 1.1858493089675903, "rewards/VideoAccuracy/std": 0.4272572696208954, "step": 644, "train_speed(iter/s)": 0.017482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 779.0, "completions/mean_length": 495.66668701171875, "completions/min_length": 342.0, "entropy/max": 1.125, "entropy/mean": 0.474609375, "entropy/min": 0.1337890625, "epoch": 0.645, "grad_norm": 1.0452420162271987, "kl": 0.267578125, "learning_rate": 5.702050879108283e-07, "loss": 0.0027335789054632187, "memory(GiB)": 147.2, "reward": 1.7045737504959106, "reward_std": 0.3052411675453186, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.275814026594162, "rewards/EvidenceHallucination/std": 0.40204769372940063, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.7550326585769653, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.582744300365448, "rewards/VideoAccuracy/std": 0.5970212817192078, "step": 645, "train_speed(iter/s)": 0.017482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 772.0, "completions/mean_length": 464.0476379394531, "completions/min_length": 358.0, "entropy/max": 0.69921875, "entropy/mean": 0.44921875, "entropy/min": 0.30078125, "epoch": 0.646, "grad_norm": 1.128949782037839, "kl": 0.279296875, "learning_rate": 5.673419765534915e-07, "loss": 0.0028231206815689802, "memory(GiB)": 147.2, "reward": 1.3619728088378906, "reward_std": 0.24435554444789886, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.16221894323825836, "rewards/EvidenceHallucination/std": 0.3455430269241333, "rewards/Evidence_Num_Record/mean": 4.309524059295654, "rewards/Evidence_Num_Record/std": 0.9750071167945862, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.23905286192893982, "rewards/VideoAccuracy/std": 0.28250643610954285, "step": 646, "train_speed(iter/s)": 0.01748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 749.0, "completions/mean_length": 430.0714416503906, "completions/min_length": 311.0, "entropy/max": 0.953125, "entropy/mean": 0.45703125, "entropy/min": 0.29296875, "epoch": 0.647, "grad_norm": 1.3864010257948174, "kl": 0.2890625, "learning_rate": 5.644832220576479e-07, "loss": 0.0029104407876729965, "memory(GiB)": 147.2, "reward": 2.0799288749694824, "reward_std": 0.16969692707061768, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6771750450134277, "rewards/EvidenceHallucination/std": 0.3804674744606018, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 1.0075210332870483, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.8492557406425476, "rewards/VideoAccuracy/std": 0.4122805595397949, "step": 647, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 894.0, "completions/mean_length": 508.3809509277344, "completions/min_length": 318.0, "entropy/max": 0.8984375, "entropy/mean": 0.384765625, "entropy/min": 0.158203125, "epoch": 0.648, "grad_norm": 1.0362772461187326, "kl": 0.2451171875, "learning_rate": 5.616288532109224e-07, "loss": 0.0024698299821466208, "memory(GiB)": 147.2, "reward": 1.8865734338760376, "reward_std": 0.09468773007392883, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5207627415657043, "rewards/EvidenceHallucination/std": 0.46173611283302307, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 0.8913052082061768, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6490874290466309, "rewards/VideoAccuracy/std": 0.387030690908432, "step": 648, "train_speed(iter/s)": 0.017484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 678.0, "completions/mean_length": 466.8333435058594, "completions/min_length": 344.0, "entropy/max": 1.1640625, "entropy/mean": 0.50390625, "entropy/min": 0.275390625, "epoch": 0.649, "grad_norm": 1.3311556860146998, "kl": 0.279296875, "learning_rate": 5.587788987567785e-07, "loss": 0.002829805016517639, "memory(GiB)": 147.2, "reward": 1.5995934009552002, "reward_std": 0.3568987548351288, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3610594570636749, "rewards/EvidenceHallucination/std": 0.42795902490615845, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.2540295124053955, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.4654768109321594, "rewards/VideoAccuracy/std": 0.43665531277656555, "step": 649, "train_speed(iter/s)": 0.017492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 562.0, "completions/mean_length": 437.2857360839844, "completions/min_length": 347.0, "entropy/max": 0.640625, "entropy/mean": 0.435546875, "entropy/min": 0.306640625, "epoch": 0.65, "grad_norm": 1.2377606703225692, "kl": 0.28515625, "learning_rate": 5.559333873942258e-07, "loss": 0.0028525185771286488, "memory(GiB)": 147.2, "reward": 1.6477826833724976, "reward_std": 0.21032457053661346, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3635348975658417, "rewards/EvidenceHallucination/std": 0.416638046503067, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.6625891327857971, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.479837566614151, "rewards/VideoAccuracy/std": 0.4521355926990509, "step": 650, "train_speed(iter/s)": 0.017475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 780.0, "completions/mean_length": 535.7142944335938, "completions/min_length": 384.0, "entropy/max": 0.52734375, "entropy/mean": 0.30078125, "entropy/min": 0.130859375, "epoch": 0.651, "grad_norm": 1.1056339325086264, "kl": 0.2275390625, "learning_rate": 5.530923477775322e-07, "loss": 0.0022936025634407997, "memory(GiB)": 147.2, "reward": 2.013475179672241, "reward_std": 0.3081175982952118, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33621248602867126, "rewards/EvidenceHallucination/std": 0.42277342081069946, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 0.7485952973365784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.7509944438934326, "rewards/VideoAccuracy/std": 0.5117350220680237, "step": 651, "train_speed(iter/s)": 0.017477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 960.0, "completions/mean_length": 490.0476379394531, "completions/min_length": 348.0, "entropy/max": 2.140625, "entropy/mean": 0.58984375, "entropy/min": 0.28125, "epoch": 0.652, "grad_norm": 0.8452958423486069, "kl": 0.271484375, "learning_rate": 5.502558085159344e-07, "loss": 0.0027937653940171003, "memory(GiB)": 147.2, "reward": 1.9492918252944946, "reward_std": 0.09397362172603607, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.0, "rewards/EvidenceHallucination/std": 0.0, "rewards/Evidence_Num_Record/mean": 5.261904716491699, "rewards/Evidence_Num_Record/std": 1.9638131856918335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.91595858335495, "rewards/VideoAccuracy/std": 0.2248149961233139, "step": 652, "train_speed(iter/s)": 0.017472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/mean_length": 472.3571472167969, "completions/min_length": 263.0, "entropy/max": 0.671875, "entropy/mean": 0.431640625, "entropy/min": 0.291015625, "epoch": 0.653, "grad_norm": 1.4113041808354707, "kl": 0.259765625, "learning_rate": 5.47423798173352e-07, "loss": 0.0026171018835157156, "memory(GiB)": 147.2, "reward": 1.7929911613464355, "reward_std": 0.17371951043605804, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4915204346179962, "rewards/EvidenceHallucination/std": 0.4581449031829834, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.5809552669525146, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.6375442147254944, "rewards/VideoAccuracy/std": 0.3973805904388428, "step": 653, "train_speed(iter/s)": 0.017473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 587.0, "completions/mean_length": 427.5238037109375, "completions/min_length": 299.0, "entropy/max": 0.56640625, "entropy/mean": 0.35546875, "entropy/min": 0.1748046875, "epoch": 0.654, "grad_norm": 1.1434917947803622, "kl": 0.26953125, "learning_rate": 5.445963452680973e-07, "loss": 0.0027125473134219646, "memory(GiB)": 147.2, "reward": 2.280250310897827, "reward_std": 0.14925819635391235, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6781247854232788, "rewards/EvidenceHallucination/std": 0.41727301478385925, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.6559829115867615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.785714328289032, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 0.9874823689460754, "rewards/VideoAccuracy/std": 0.49428799748420715, "step": 654, "train_speed(iter/s)": 0.017478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/mean_length": 491.7857360839844, "completions/min_length": 320.0, "entropy/max": 1.6640625, "entropy/mean": 0.484375, "entropy/min": 0.1474609375, "epoch": 0.655, "grad_norm": 1.0853257983035267, "kl": 0.25, "learning_rate": 5.417734782725896e-07, "loss": 0.002571134828031063, "memory(GiB)": 147.2, "reward": 1.7250261306762695, "reward_std": 0.27739259600639343, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20502276718616486, "rewards/EvidenceHallucination/std": 0.3402935564517975, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 2.2959086894989014, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6173549294471741, "rewards/VideoAccuracy/std": 0.5183524489402771, "step": 655, "train_speed(iter/s)": 0.017475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 594.0, "completions/mean_length": 444.0238037109375, "completions/min_length": 302.0, "entropy/max": 0.56640625, "entropy/mean": 0.41015625, "entropy/min": 0.28125, "epoch": 0.656, "grad_norm": 1.246969051558, "kl": 0.2890625, "learning_rate": 5.389552256130689e-07, "loss": 0.0029038935899734497, "memory(GiB)": 147.2, "reward": 1.62190580368042, "reward_std": 0.21904978156089783, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40573424100875854, "rewards/EvidenceHallucination/std": 0.45892637968063354, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 0.8890219330787659, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.41694924235343933, "rewards/VideoAccuracy/std": 0.3788967728614807, "step": 656, "train_speed(iter/s)": 0.017475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 530.0, "completions/mean_length": 429.9285888671875, "completions/min_length": 299.0, "entropy/max": 0.65234375, "entropy/mean": 0.43359375, "entropy/min": 0.3125, "epoch": 0.657, "grad_norm": 1.2243096462731131, "kl": 0.287109375, "learning_rate": 5.361416156693075e-07, "loss": 0.0028951808344572783, "memory(GiB)": 147.2, "reward": 1.3817565441131592, "reward_std": 0.17917168140411377, "rewards/EvidenceFormat/mean": 0.9523809552192688, "rewards/EvidenceFormat/std": 0.21554026007652283, "rewards/EvidenceHallucination/mean": 0.19264882802963257, "rewards/EvidenceHallucination/std": 0.37777388095855713, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 1.143582820892334, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.2717982530593872, "rewards/VideoAccuracy/std": 0.37504634261131287, "step": 657, "train_speed(iter/s)": 0.017487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 706.0, "completions/mean_length": 508.1428527832031, "completions/min_length": 322.0, "entropy/max": 1.5859375, "entropy/mean": 0.443359375, "entropy/min": 0.17578125, "epoch": 0.658, "grad_norm": 1.095017114903171, "kl": 0.2294921875, "learning_rate": 5.333326767743263e-07, "loss": 0.0023005008697509766, "memory(GiB)": 147.2, "reward": 2.0157835483551025, "reward_std": 0.2706416845321655, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6202226877212524, "rewards/EvidenceHallucination/std": 0.38593390583992004, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.2484601736068726, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.7631673216819763, "rewards/VideoAccuracy/std": 0.3701137602329254, "step": 658, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 534.0, "completions/mean_length": 445.3333435058594, "completions/min_length": 349.0, "entropy/max": 0.63671875, "entropy/mean": 0.451171875, "entropy/min": 0.29296875, "epoch": 0.659, "grad_norm": 1.3566480574860367, "kl": 0.279296875, "learning_rate": 5.305284372141095e-07, "loss": 0.002823568880558014, "memory(GiB)": 147.2, "reward": 1.701953411102295, "reward_std": 0.31828799843788147, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4799630343914032, "rewards/EvidenceHallucination/std": 0.4651428461074829, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 0.8239306807518005, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5440560579299927, "rewards/VideoAccuracy/std": 0.43683817982673645, "step": 659, "train_speed(iter/s)": 0.017487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 929.0, "completions/mean_length": 471.69049072265625, "completions/min_length": 317.0, "entropy/max": 0.6953125, "entropy/mean": 0.451171875, "entropy/min": 0.310546875, "epoch": 0.66, "grad_norm": 1.2157016101281084, "kl": 0.2421875, "learning_rate": 5.277289252273174e-07, "loss": 0.002447072882205248, "memory(GiB)": 147.2, "reward": 1.294893503189087, "reward_std": 0.20156048238277435, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.13871105015277863, "rewards/EvidenceHallucination/std": 0.3208237886428833, "rewards/Evidence_Num_Record/mean": 4.642857074737549, "rewards/Evidence_Num_Record/std": 1.935816764831543, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.20048469305038452, "rewards/VideoAccuracy/std": 0.31244516372680664, "step": 660, "train_speed(iter/s)": 0.017485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 704.0, "completions/mean_length": 496.0714416503906, "completions/min_length": 346.0, "entropy/max": 0.51171875, "entropy/mean": 0.333984375, "entropy/min": 0.1650390625, "epoch": 0.661, "grad_norm": 0.9492405219351743, "kl": 0.234375, "learning_rate": 5.249341690050051e-07, "loss": 0.002568890806287527, "memory(GiB)": 147.2, "reward": 1.9744826555252075, "reward_std": 0.18958406150341034, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35581013560295105, "rewards/EvidenceHallucination/std": 0.42360445857048035, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 0.6398497223854065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.7080825567245483, "rewards/VideoAccuracy/std": 0.6018710732460022, "step": 661, "train_speed(iter/s)": 0.017487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 763.0, "completions/mean_length": 495.6428527832031, "completions/min_length": 320.0, "entropy/max": 1.3125, "entropy/mean": 0.59375, "entropy/min": 0.29296875, "epoch": 0.662, "grad_norm": 1.2389370789230514, "kl": 0.287109375, "learning_rate": 5.22144196690337e-07, "loss": 0.0029177777469158173, "memory(GiB)": 147.2, "reward": 1.6555906534194946, "reward_std": 0.21660292148590088, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45820945501327515, "rewards/EvidenceHallucination/std": 0.39233002066612244, "rewards/Evidence_Num_Record/mean": 5.214285850524902, "rewards/Evidence_Num_Record/std": 1.6160130500793457, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777005434036255, "rewards/VideoAccuracy/mean": 0.5401391386985779, "rewards/VideoAccuracy/std": 0.4661349058151245, "step": 662, "train_speed(iter/s)": 0.017488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 633.0, "completions/mean_length": 477.0, "completions/min_length": 274.0, "entropy/max": 0.71484375, "entropy/mean": 0.462890625, "entropy/min": 0.2734375, "epoch": 0.663, "grad_norm": 1.455002442545534, "kl": 0.25390625, "learning_rate": 5.193590363783027e-07, "loss": 0.0025626281276345253, "memory(GiB)": 147.2, "reward": 1.7300691604614258, "reward_std": 0.2832028269767761, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4435245990753174, "rewards/EvidenceHallucination/std": 0.42624273896217346, "rewards/Evidence_Num_Record/mean": 4.785714149475098, "rewards/Evidence_Num_Record/std": 1.4903874397277832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5794594287872314, "rewards/VideoAccuracy/std": 0.4444909691810608, "step": 663, "train_speed(iter/s)": 0.017489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 546.0, "completions/mean_length": 432.71429443359375, "completions/min_length": 283.0, "entropy/max": 0.53515625, "entropy/mean": 0.37109375, "entropy/min": 0.19921875, "epoch": 0.664, "grad_norm": 1.3961964678083207, "kl": 0.279296875, "learning_rate": 5.16578716115436e-07, "loss": 0.0028040807228535414, "memory(GiB)": 147.2, "reward": 2.2781004905700684, "reward_std": 0.18304118514060974, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7079420685768127, "rewards/EvidenceHallucination/std": 0.38466620445251465, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 0.6005223989486694, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.9746074080467224, "rewards/VideoAccuracy/std": 0.4673185646533966, "step": 664, "train_speed(iter/s)": 0.017491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/mean_length": 520.6904907226562, "completions/min_length": 393.0, "entropy/max": 0.7578125, "entropy/mean": 0.365234375, "entropy/min": 0.138671875, "epoch": 0.665, "grad_norm": 1.066138602840599, "kl": 0.234375, "learning_rate": 5.138032638995315e-07, "loss": 0.002420675940811634, "memory(GiB)": 147.2, "reward": 2.034099578857422, "reward_std": 0.2014666646718979, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49243220686912537, "rewards/EvidenceHallucination/std": 0.47104111313819885, "rewards/Evidence_Num_Record/mean": 5.238095283508301, "rewards/Evidence_Num_Record/std": 2.721275806427002, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8689465522766113, "rewards/VideoAccuracy/std": 0.36233431100845337, "step": 665, "train_speed(iter/s)": 0.017487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 792.0, "completions/mean_length": 464.8095397949219, "completions/min_length": 318.0, "entropy/max": 1.03125, "entropy/mean": 0.435546875, "entropy/min": 0.193359375, "epoch": 0.666, "grad_norm": 1.089488383391917, "kl": 0.287109375, "learning_rate": 5.110327076793612e-07, "loss": 0.002939028199762106, "memory(GiB)": 147.2, "reward": 1.6147891283035278, "reward_std": 0.17048370838165283, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4267515540122986, "rewards/EvidenceHallucination/std": 0.44840237498283386, "rewards/Evidence_Num_Record/mean": 4.761904716491699, "rewards/Evidence_Num_Record/std": 1.2842293977737427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.47229599952697754, "rewards/VideoAccuracy/std": 0.42426127195358276, "step": 666, "train_speed(iter/s)": 0.017469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 614.0, "completions/mean_length": 451.26190185546875, "completions/min_length": 341.0, "entropy/max": 0.52734375, "entropy/mean": 0.43359375, "entropy/min": 0.31640625, "epoch": 0.667, "grad_norm": 1.37364539682683, "kl": 0.294921875, "learning_rate": 5.08267075354396e-07, "loss": 0.0029405485838651657, "memory(GiB)": 147.2, "reward": 2.1241273880004883, "reward_std": 0.2718643546104431, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.697299599647522, "rewards/EvidenceHallucination/std": 0.40521708130836487, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.148902416229248, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8846674561500549, "rewards/VideoAccuracy/std": 0.5274243354797363, "step": 667, "train_speed(iter/s)": 0.017473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 905.0, "completions/mean_length": 538.7380981445312, "completions/min_length": 315.0, "entropy/max": 1.6171875, "entropy/mean": 0.380859375, "entropy/min": 0.134765625, "epoch": 0.668, "grad_norm": 0.9613458390599403, "kl": 0.2275390625, "learning_rate": 5.055063947745233e-07, "loss": 0.0023003662936389446, "memory(GiB)": 147.2, "reward": 2.047619342803955, "reward_std": 0.21020303666591644, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29439035058021545, "rewards/EvidenceHallucination/std": 0.427455335855484, "rewards/Evidence_Num_Record/mean": 4.309524059295654, "rewards/Evidence_Num_Record/std": 0.8692047595977783, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8554080724716187, "rewards/VideoAccuracy/std": 0.5128820538520813, "step": 668, "train_speed(iter/s)": 0.017472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 856.0, "completions/mean_length": 505.16668701171875, "completions/min_length": 353.0, "entropy/max": 0.57421875, "entropy/mean": 0.404296875, "entropy/min": 0.267578125, "epoch": 0.669, "grad_norm": 1.2010642006938999, "kl": 0.259765625, "learning_rate": 5.027506937397652e-07, "loss": 0.0026324428617954254, "memory(GiB)": 147.2, "reward": 1.830915927886963, "reward_std": 0.20380070805549622, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5770098567008972, "rewards/EvidenceHallucination/std": 0.4214893579483032, "rewards/Evidence_Num_Record/mean": 5.38095235824585, "rewards/Evidence_Num_Record/std": 2.1521670818328857, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.6631327867507935, "rewards/VideoAccuracy/std": 0.4034560024738312, "step": 669, "train_speed(iter/s)": 0.017474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 751.0, "completions/mean_length": 455.71429443359375, "completions/min_length": 202.0, "entropy/max": 0.70703125, "entropy/mean": 0.458984375, "entropy/min": 0.28515625, "epoch": 0.67, "grad_norm": 1.145327570121584, "kl": 0.296875, "learning_rate": 5.000000000000002e-07, "loss": 0.0029850280843675137, "memory(GiB)": 147.2, "reward": 1.5763933658599854, "reward_std": 0.1424618363380432, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4487646818161011, "rewards/EvidenceHallucination/std": 0.457754522562027, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 1.4283100366592407, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.45330697298049927, "rewards/VideoAccuracy/std": 0.3984241187572479, "step": 670, "train_speed(iter/s)": 0.017474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 833.0, "completions/mean_length": 514.8333129882812, "completions/min_length": 323.0, "entropy/max": 0.625, "entropy/mean": 0.306640625, "entropy/min": 0.15625, "epoch": 0.671, "grad_norm": 1.0521617393984861, "kl": 0.2216796875, "learning_rate": 4.972543412546841e-07, "loss": 0.0022550090216100216, "memory(GiB)": 147.2, "reward": 2.2052764892578125, "reward_std": 0.18084117770195007, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5524303913116455, "rewards/EvidenceHallucination/std": 0.40628620982170105, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.9865530133247375, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8995524048805237, "rewards/VideoAccuracy/std": 0.40781137347221375, "step": 671, "train_speed(iter/s)": 0.017474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2349.0, "completions/mean_length": 590.6666870117188, "completions/min_length": 329.0, "entropy/max": 1.9453125, "entropy/mean": 0.478515625, "entropy/min": 0.1435546875, "epoch": 0.672, "grad_norm": 1.0176091140296963, "kl": 0.2490234375, "learning_rate": 4.945137451525706e-07, "loss": 0.002639862708747387, "memory(GiB)": 147.2, "reward": 1.3738744258880615, "reward_std": 0.21433894336223602, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29613929986953735, "rewards/EvidenceHallucination/std": 0.43096432089805603, "rewards/Evidence_Num_Record/mean": 6.690476417541504, "rewards/Evidence_Num_Record/std": 5.019416332244873, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.28607502579689026, "rewards/VideoAccuracy/std": 0.4201844334602356, "step": 672, "train_speed(iter/s)": 0.017464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1103.0, "completions/mean_length": 545.7857055664062, "completions/min_length": 350.0, "entropy/max": 0.66796875, "entropy/mean": 0.431640625, "entropy/min": 0.263671875, "epoch": 0.673, "grad_norm": 1.0327069583898099, "kl": 0.2412109375, "learning_rate": 4.91778239291431e-07, "loss": 0.002442984376102686, "memory(GiB)": 147.2, "reward": 1.2752413749694824, "reward_std": 0.19907771050930023, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.190188929438591, "rewards/EvidenceHallucination/std": 0.3720424771308899, "rewards/Evidence_Num_Record/mean": 5.404761791229248, "rewards/Evidence_Num_Record/std": 3.485576629638672, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.2038702368736267, "rewards/VideoAccuracy/std": 0.33589303493499756, "step": 673, "train_speed(iter/s)": 0.017465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.0, "completions/mean_length": 445.5714416503906, "completions/min_length": 342.0, "entropy/max": 0.53125, "entropy/mean": 0.34765625, "entropy/min": 0.169921875, "epoch": 0.674, "grad_norm": 1.1536689632597585, "kl": 0.287109375, "learning_rate": 4.890478512177795e-07, "loss": 0.002904066815972328, "memory(GiB)": 147.2, "reward": 2.2410874366760254, "reward_std": 0.1582295447587967, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.653382420539856, "rewards/EvidenceHallucination/std": 0.40857383608818054, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.8811485767364502, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.9485061168670654, "rewards/VideoAccuracy/std": 0.4866093099117279, "step": 674, "train_speed(iter/s)": 0.01747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1188.0, "completions/mean_length": 520.90478515625, "completions/min_length": 350.0, "entropy/max": 1.0859375, "entropy/mean": 0.49609375, "entropy/min": 0.2158203125, "epoch": 0.675, "grad_norm": 1.2732652889309997, "kl": 0.251953125, "learning_rate": 4.863226084265939e-07, "loss": 0.002592825796455145, "memory(GiB)": 147.2, "reward": 2.215259313583374, "reward_std": 0.0945897102355957, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6706887483596802, "rewards/EvidenceHallucination/std": 0.3234904706478119, "rewards/Evidence_Num_Record/mean": 4.904761791229248, "rewards/Evidence_Num_Record/std": 1.7364039421081543, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 1.014454960823059, "rewards/VideoAccuracy/std": 0.18321140110492706, "step": 675, "train_speed(iter/s)": 0.01747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 824.0, "completions/mean_length": 464.0714416503906, "completions/min_length": 290.0, "entropy/max": 0.69921875, "entropy/mean": 0.453125, "entropy/min": 0.263671875, "epoch": 0.676, "grad_norm": 1.4533553837831945, "kl": 0.275390625, "learning_rate": 4.836025383610382e-07, "loss": 0.0027627265080809593, "memory(GiB)": 147.2, "reward": 1.8260008096694946, "reward_std": 0.19941721856594086, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6072930693626404, "rewards/EvidenceHallucination/std": 0.46310192346572876, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.6265795230865479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.637875497341156, "rewards/VideoAccuracy/std": 0.4270996153354645, "step": 676, "train_speed(iter/s)": 0.017467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1144.0, "completions/mean_length": 484.3809509277344, "completions/min_length": 367.0, "entropy/max": 0.7578125, "entropy/mean": 0.421875, "entropy/min": 0.1962890625, "epoch": 0.677, "grad_norm": 1.1999298873211424, "kl": 0.279296875, "learning_rate": 4.808876684121881e-07, "loss": 0.0028530347626656294, "memory(GiB)": 147.2, "reward": 1.7600337266921997, "reward_std": 0.11497774720191956, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4731876254081726, "rewards/EvidenceHallucination/std": 0.4828210771083832, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 2.1445987224578857, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008703470230103, "rewards/VideoAccuracy/mean": 0.579681932926178, "rewards/VideoAccuracy/std": 0.584205687046051, "step": 677, "train_speed(iter/s)": 0.017469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 624.0, "completions/mean_length": 470.8571472167969, "completions/min_length": 319.0, "entropy/max": 1.28125, "entropy/mean": 0.357421875, "entropy/min": 0.12353515625, "epoch": 0.678, "grad_norm": 1.1855086211602954, "kl": 0.2392578125, "learning_rate": 4.781780259187542e-07, "loss": 0.002429666928946972, "memory(GiB)": 147.2, "reward": 2.1749234199523926, "reward_std": 0.13399794697761536, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6821054220199585, "rewards/EvidenceHallucination/std": 0.3648480176925659, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.9320714473724365, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.905168890953064, "rewards/VideoAccuracy/std": 0.2352881133556366, "step": 678, "train_speed(iter/s)": 0.01747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 713.0, "completions/mean_length": 459.71429443359375, "completions/min_length": 292.0, "entropy/max": 1.515625, "entropy/mean": 0.46875, "entropy/min": 0.3203125, "epoch": 0.679, "grad_norm": 1.3761276944643144, "kl": 0.26171875, "learning_rate": 4.7547363816680564e-07, "loss": 0.002636173740029335, "memory(GiB)": 147.2, "reward": 1.8883533477783203, "reward_std": 0.22926215827465057, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6010059118270874, "rewards/EvidenceHallucination/std": 0.40532511472702026, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.148902416229248, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.7062475085258484, "rewards/VideoAccuracy/std": 0.39731258153915405, "step": 679, "train_speed(iter/s)": 0.017468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1857.0, "completions/mean_length": 522.1904907226562, "completions/min_length": 340.0, "entropy/max": 0.578125, "entropy/mean": 0.427734375, "entropy/min": 0.296875, "epoch": 0.68, "grad_norm": 1.2514499961252392, "kl": 0.26171875, "learning_rate": 4.727745323894975e-07, "loss": 0.0026679779402911663, "memory(GiB)": 147.2, "reward": 1.598029375076294, "reward_std": 0.24943912029266357, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3181222677230835, "rewards/EvidenceHallucination/std": 0.4199323356151581, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 1.9666203260421753, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.47250017523765564, "rewards/VideoAccuracy/std": 0.5534753203392029, "step": 680, "train_speed(iter/s)": 0.017471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 784.0, "completions/mean_length": 504.0952453613281, "completions/min_length": 330.0, "entropy/max": 0.484375, "entropy/mean": 0.283203125, "entropy/min": 0.1337890625, "epoch": 0.681, "grad_norm": 1.0238181065161005, "kl": 0.2275390625, "learning_rate": 4.700807357667952e-07, "loss": 0.0023045698180794716, "memory(GiB)": 147.2, "reward": 2.1714138984680176, "reward_std": 0.05831073969602585, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49484798312187195, "rewards/EvidenceHallucination/std": 0.41957953572273254, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.9084069728851318, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8724439740180969, "rewards/VideoAccuracy/std": 0.4067482352256775, "step": 681, "train_speed(iter/s)": 0.017472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/mean_length": 573.4761962890625, "completions/min_length": 379.0, "entropy/max": 1.7265625, "entropy/mean": 0.4609375, "entropy/min": 0.1962890625, "epoch": 0.682, "grad_norm": 0.9955981566865121, "kl": 0.251953125, "learning_rate": 4.673922754252001e-07, "loss": 0.002564347116276622, "memory(GiB)": 147.2, "reward": 1.5866285562515259, "reward_std": 0.14283575117588043, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3923703134059906, "rewards/EvidenceHallucination/std": 0.4100193977355957, "rewards/Evidence_Num_Record/mean": 6.0714287757873535, "rewards/Evidence_Num_Record/std": 2.699528932571411, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777005434036255, "rewards/VideoAccuracy/mean": 0.48434481024742126, "rewards/VideoAccuracy/std": 0.4688479006290436, "step": 682, "train_speed(iter/s)": 0.017467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1168.0, "completions/mean_length": 481.21429443359375, "completions/min_length": 286.0, "entropy/max": 0.79296875, "entropy/mean": 0.427734375, "entropy/min": 0.12890625, "epoch": 0.683, "grad_norm": 1.4492171218624827, "kl": 0.259765625, "learning_rate": 4.6470917843747845e-07, "loss": 0.0026228614151477814, "memory(GiB)": 147.2, "reward": 1.453622579574585, "reward_std": 0.37589478492736816, "rewards/EvidenceFormat/mean": 0.9523809552192688, "rewards/EvidenceFormat/std": 0.21554027497768402, "rewards/EvidenceHallucination/mean": 0.30742937326431274, "rewards/EvidenceHallucination/std": 0.4408752918243408, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.4010117053985596, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.38737478852272034, "rewards/VideoAccuracy/std": 0.48056450486183167, "step": 683, "train_speed(iter/s)": 0.017465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 768.0, "completions/mean_length": 516.0238037109375, "completions/min_length": 351.0, "entropy/max": 0.47265625, "entropy/mean": 0.3125, "entropy/min": 0.1455078125, "epoch": 0.684, "grad_norm": 1.1759029669622152, "kl": 0.263671875, "learning_rate": 4.620314718223876e-07, "loss": 0.0026608225889503956, "memory(GiB)": 147.2, "reward": 2.253122568130493, "reward_std": 0.3057974874973297, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6317847371101379, "rewards/EvidenceHallucination/std": 0.4286155700683594, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.2087563276290894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9600986242294312, "rewards/VideoAccuracy/std": 0.4437883496284485, "step": 684, "train_speed(iter/s)": 0.017456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 799.0, "completions/mean_length": 505.5952453613281, "completions/min_length": 318.0, "entropy/max": 1.25, "entropy/mean": 0.46484375, "entropy/min": 0.1259765625, "epoch": 0.685, "grad_norm": 1.2982480360663333, "kl": 0.259765625, "learning_rate": 4.5935918254440274e-07, "loss": 0.0026418499182909727, "memory(GiB)": 147.2, "reward": 2.0587644577026367, "reward_std": 0.16774247586727142, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7660202980041504, "rewards/EvidenceHallucination/std": 0.3533811569213867, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.8377888202667236, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8388935327529907, "rewards/VideoAccuracy/std": 0.3307531774044037, "step": 685, "train_speed(iter/s)": 0.017459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 928.0, "completions/mean_length": 483.6428527832031, "completions/min_length": 312.0, "entropy/max": 0.69140625, "entropy/mean": 0.44921875, "entropy/min": 0.26953125, "epoch": 0.686, "grad_norm": 1.086242695905268, "kl": 0.259765625, "learning_rate": 4.566923375134472e-07, "loss": 0.002612657379359007, "memory(GiB)": 147.2, "reward": 1.5800120830535889, "reward_std": 0.1780678629875183, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4063895642757416, "rewards/EvidenceHallucination/std": 0.434430867433548, "rewards/Evidence_Num_Record/mean": 4.642857074737549, "rewards/Evidence_Num_Record/std": 1.5899291038513184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.43682926893234253, "rewards/VideoAccuracy/std": 0.42221078276634216, "step": 686, "train_speed(iter/s)": 0.017457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/mean_length": 464.23809814453125, "completions/min_length": 343.0, "entropy/max": 0.515625, "entropy/mean": 0.41796875, "entropy/min": 0.287109375, "epoch": 0.687, "grad_norm": 1.1617117839348086, "kl": 0.287109375, "learning_rate": 4.540309635846209e-07, "loss": 0.002888549119234085, "memory(GiB)": 147.2, "reward": 1.6990382671356201, "reward_std": 0.2361672967672348, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39703914523124695, "rewards/EvidenceHallucination/std": 0.45230522751808167, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 1.1506701707839966, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.5196302533149719, "rewards/VideoAccuracy/std": 0.5318339467048645, "step": 687, "train_speed(iter/s)": 0.01746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 807.0, "completions/mean_length": 576.1428833007812, "completions/min_length": 339.0, "entropy/max": 1.8203125, "entropy/mean": 0.35546875, "entropy/min": 0.130859375, "epoch": 0.688, "grad_norm": 1.0048234365439954, "kl": 0.2119140625, "learning_rate": 4.513750875579303e-07, "loss": 0.0021487295161932707, "memory(GiB)": 147.2, "reward": 2.0577375888824463, "reward_std": 0.2570165693759918, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6128485202789307, "rewards/EvidenceHallucination/std": 0.4469565153121948, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 0.9912508726119995, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8018344640731812, "rewards/VideoAccuracy/std": 0.4513843059539795, "step": 688, "train_speed(iter/s)": 0.017459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1711.0, "completions/mean_length": 561.547607421875, "completions/min_length": 364.0, "entropy/max": 0.80859375, "entropy/mean": 0.39453125, "entropy/min": 0.154296875, "epoch": 0.689, "grad_norm": 1.1651389191194548, "kl": 0.24609375, "learning_rate": 4.487247361780169e-07, "loss": 0.0025680987164378166, "memory(GiB)": 147.2, "reward": 1.6228870153427124, "reward_std": 0.1611291617155075, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40475350618362427, "rewards/EvidenceHallucination/std": 0.4765009880065918, "rewards/Evidence_Num_Record/mean": 6.642857074737549, "rewards/Evidence_Num_Record/std": 4.705064296722412, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.48955535888671875, "rewards/VideoAccuracy/std": 0.4310154318809509, "step": 689, "train_speed(iter/s)": 0.01745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/mean_length": 460.7857360839844, "completions/min_length": 343.0, "entropy/max": 0.7734375, "entropy/mean": 0.44921875, "entropy/min": 0.322265625, "epoch": 0.69, "grad_norm": 1.2929031857591144, "kl": 0.29296875, "learning_rate": 4.460799361338897e-07, "loss": 0.0029428384732455015, "memory(GiB)": 147.2, "reward": 1.5768420696258545, "reward_std": 0.29188913106918335, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31747567653656006, "rewards/EvidenceHallucination/std": 0.4247772693634033, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.5294249057769775, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.48001348972320557, "rewards/VideoAccuracy/std": 0.6256462335586548, "step": 690, "train_speed(iter/s)": 0.017453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 886.0, "completions/mean_length": 520.1666870117188, "completions/min_length": 350.0, "entropy/max": 0.51953125, "entropy/mean": 0.294921875, "entropy/min": 0.115234375, "epoch": 0.691, "grad_norm": 1.0833730957131797, "kl": 0.23828125, "learning_rate": 4.4344071405865645e-07, "loss": 0.002413667505607009, "memory(GiB)": 147.2, "reward": 1.9608988761901855, "reward_std": 0.2161414921283722, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5640000700950623, "rewards/EvidenceHallucination/std": 0.47513172030448914, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 0.9320714473724365, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.6576226949691772, "rewards/VideoAccuracy/std": 0.36465051770210266, "step": 691, "train_speed(iter/s)": 0.017455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 729.0, "completions/mean_length": 506.2857360839844, "completions/min_length": 295.0, "entropy/max": 2.03125, "entropy/mean": 0.60546875, "entropy/min": 0.30078125, "epoch": 0.692, "grad_norm": 1.0704807173778, "kl": 0.275390625, "learning_rate": 4.408070965292533e-07, "loss": 0.0028132570441812277, "memory(GiB)": 147.2, "reward": 1.4591788053512573, "reward_std": 0.24840104579925537, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2568591833114624, "rewards/EvidenceHallucination/std": 0.37475547194480896, "rewards/Evidence_Num_Record/mean": 5.404761791229248, "rewards/Evidence_Num_Record/std": 1.4492979049682617, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3744736611843109, "rewards/VideoAccuracy/std": 0.45563003420829773, "step": 692, "train_speed(iter/s)": 0.017459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/mean_length": 427.3333435058594, "completions/min_length": 279.0, "entropy/max": 0.65625, "entropy/mean": 0.4453125, "entropy/min": 0.3125, "epoch": 0.693, "grad_norm": 1.245436341733027, "kl": 0.275390625, "learning_rate": 4.381791100661798e-07, "loss": 0.002816341584548354, "memory(GiB)": 147.2, "reward": 1.4305661916732788, "reward_std": 0.19407030940055847, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29068663716316223, "rewards/EvidenceHallucination/std": 0.423583060503006, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 1.1466256380081177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3390955328941345, "rewards/VideoAccuracy/std": 0.46505600214004517, "step": 693, "train_speed(iter/s)": 0.017456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 672.0, "completions/mean_length": 491.71429443359375, "completions/min_length": 352.0, "entropy/max": 0.51171875, "entropy/mean": 0.353515625, "entropy/min": 0.15234375, "epoch": 0.694, "grad_norm": 1.3098769771139183, "kl": 0.267578125, "learning_rate": 4.35556781133231e-07, "loss": 0.0026831082068383694, "memory(GiB)": 147.2, "reward": 2.2679808139801025, "reward_std": 0.11200588941574097, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7510750889778137, "rewards/EvidenceHallucination/std": 0.3746412992477417, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.7963330745697021, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.9558608531951904, "rewards/VideoAccuracy/std": 0.2994694709777832, "step": 694, "train_speed(iter/s)": 0.017458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 775.0, "completions/mean_length": 565.2619018554688, "completions/min_length": 384.0, "entropy/max": 0.72265625, "entropy/mean": 0.39453125, "entropy/min": 0.15625, "epoch": 0.695, "grad_norm": 1.071990321028825, "kl": 0.2294921875, "learning_rate": 4.3294013613722937e-07, "loss": 0.002319510094821453, "memory(GiB)": 147.2, "reward": 1.5056880712509155, "reward_std": 0.33209341764450073, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28727561235427856, "rewards/EvidenceHallucination/std": 0.42239242792129517, "rewards/Evidence_Num_Record/mean": 5.476190567016602, "rewards/Evidence_Num_Record/std": 2.015042304992676, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.38156628608703613, "rewards/VideoAccuracy/std": 0.4642001688480377, "step": 695, "train_speed(iter/s)": 0.017465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2371.0, "completions/mean_length": 559.7857055664062, "completions/min_length": 301.0, "entropy/max": 1.0078125, "entropy/mean": 0.4453125, "entropy/min": 0.1533203125, "epoch": 0.696, "grad_norm": 1.2218640902082203, "kl": 0.2421875, "learning_rate": 4.303292014277612e-07, "loss": 0.0025270027108490467, "memory(GiB)": 147.2, "reward": 1.5773190259933472, "reward_std": 0.14106562733650208, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3630666732788086, "rewards/EvidenceHallucination/std": 0.44932857155799866, "rewards/Evidence_Num_Record/mean": 5.61904764175415, "rewards/Evidence_Num_Record/std": 5.103572845458984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.45232465863227844, "rewards/VideoAccuracy/std": 0.4420204162597656, "step": 696, "train_speed(iter/s)": 0.017443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 655.0, "completions/mean_length": 438.4285888671875, "completions/min_length": 292.0, "entropy/max": 0.54296875, "entropy/mean": 0.40625, "entropy/min": 0.29296875, "epoch": 0.697, "grad_norm": 1.1802489020241533, "kl": 0.298828125, "learning_rate": 4.277240032969105e-07, "loss": 0.0030094156973063946, "memory(GiB)": 147.2, "reward": 1.8751962184906006, "reward_std": 0.1341707706451416, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5513318777084351, "rewards/EvidenceHallucination/std": 0.4631679058074951, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 0.8942323923110962, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6649297475814819, "rewards/VideoAccuracy/std": 0.5983375906944275, "step": 697, "train_speed(iter/s)": 0.017447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 770.0, "completions/mean_length": 552.7857055664062, "completions/min_length": 356.0, "entropy/max": 1.109375, "entropy/mean": 0.36328125, "entropy/min": 0.15234375, "epoch": 0.698, "grad_norm": 0.8709243260210473, "kl": 0.2255859375, "learning_rate": 4.251245679789928e-07, "loss": 0.0022979374043643475, "memory(GiB)": 147.2, "reward": 2.063358783721924, "reward_std": 0.1354062259197235, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5890229940414429, "rewards/EvidenceHallucination/std": 0.4311382472515106, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.783913254737854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.8217445611953735, "rewards/VideoAccuracy/std": 0.4496909976005554, "step": 698, "train_speed(iter/s)": 0.017446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 805.0, "completions/mean_length": 518.8809814453125, "completions/min_length": 334.0, "entropy/max": 1.7265625, "entropy/mean": 0.484375, "entropy/min": 0.228515625, "epoch": 0.699, "grad_norm": 1.1510972726493807, "kl": 0.26171875, "learning_rate": 4.2253092165029326e-07, "loss": 0.0026733647100627422, "memory(GiB)": 147.2, "reward": 1.6124541759490967, "reward_std": 0.28479164838790894, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41173335909843445, "rewards/EvidenceHallucination/std": 0.44702908396720886, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 1.4650397300720215, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.4777264893054962, "rewards/VideoAccuracy/std": 0.4453105330467224, "step": 699, "train_speed(iter/s)": 0.017444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2616.0, "completions/mean_length": 513.9761962890625, "completions/min_length": 291.0, "entropy/max": 0.75, "entropy/mean": 0.4296875, "entropy/min": 0.1279296875, "epoch": 0.7, "grad_norm": 1.2691967916036708, "kl": 0.251953125, "learning_rate": 4.1994309042880193e-07, "loss": 0.002594948513433337, "memory(GiB)": 147.2, "reward": 1.737424612045288, "reward_std": 0.26810336112976074, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45363911986351013, "rewards/EvidenceHallucination/std": 0.4442523717880249, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 3.351287364959717, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6133636236190796, "rewards/VideoAccuracy/std": 0.5350895524024963, "step": 700, "train_speed(iter/s)": 0.017432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1964.0, "completions/mean_length": 573.952392578125, "completions/min_length": 329.0, "entropy/max": 1.015625, "entropy/mean": 0.447265625, "entropy/min": 0.1416015625, "epoch": 0.701, "grad_norm": 1.0853250853365422, "kl": 0.2578125, "learning_rate": 4.173611003739498e-07, "loss": 0.002697226358577609, "memory(GiB)": 136.59, "reward": 1.6427127122879028, "reward_std": 0.10287611186504364, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4754685163497925, "rewards/EvidenceHallucination/std": 0.4412776529788971, "rewards/Evidence_Num_Record/mean": 6.309524059295654, "rewards/Evidence_Num_Record/std": 4.425469875335693, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5476190447807312, "rewards/VideoAccuracy/std": 0.5037605166435242, "step": 701, "train_speed(iter/s)": 3.892747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1631.0, "completions/mean_length": 590.0714111328125, "completions/min_length": 301.0, "entropy/max": 1.6875, "entropy/mean": 0.5, "entropy/min": 0.1875, "epoch": 0.702, "grad_norm": 1.0887211150611817, "kl": 0.2470703125, "learning_rate": 4.1478497748634876e-07, "loss": 0.0025451509281992912, "memory(GiB)": 136.59, "reward": 1.436562418937683, "reward_std": 0.3272863030433655, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31658434867858887, "rewards/EvidenceHallucination/std": 0.41431131958961487, "rewards/Evidence_Num_Record/mean": 6.1666669845581055, "rewards/Evidence_Num_Record/std": 1.898865818977356, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.33991220593452454, "rewards/VideoAccuracy/std": 0.44950252771377563, "step": 702, "train_speed(iter/s)": 2.872482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1660.0, "completions/mean_length": 565.952392578125, "completions/min_length": 332.0, "entropy/max": 0.7109375, "entropy/mean": 0.470703125, "entropy/min": 0.279296875, "epoch": 0.703, "grad_norm": 1.1977387701297668, "kl": 0.2470703125, "learning_rate": 4.1221474770752696e-07, "loss": 0.0025409169029444456, "memory(GiB)": 136.59, "reward": 1.3434559106826782, "reward_std": 0.3675708770751953, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22899995744228363, "rewards/EvidenceHallucination/std": 0.3948313295841217, "rewards/Evidence_Num_Record/mean": 5.952381134033203, "rewards/Evidence_Num_Record/std": 4.444521903991699, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.26432254910469055, "rewards/VideoAccuracy/std": 0.36528319120407104, "step": 703, "train_speed(iter/s)": 2.33247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 890.0, "completions/mean_length": 521.6190795898438, "completions/min_length": 371.0, "entropy/max": 0.6484375, "entropy/mean": 0.34765625, "entropy/min": 0.134765625, "epoch": 0.704, "grad_norm": 1.1878908628763787, "kl": 0.25390625, "learning_rate": 4.096504369196704e-07, "loss": 0.0025660318788141012, "memory(GiB)": 136.59, "reward": 2.0264220237731934, "reward_std": 0.2580689787864685, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4410540759563446, "rewards/EvidenceHallucination/std": 0.4718802869319916, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.1024731397628784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.761904776096344, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.7858304381370544, "rewards/VideoAccuracy/std": 0.5604264736175537, "step": 704, "train_speed(iter/s)": 2.056455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 828.0, "completions/mean_length": 546.7619018554688, "completions/min_length": 322.0, "entropy/max": 1.1171875, "entropy/mean": 0.40234375, "entropy/min": 0.1689453125, "epoch": 0.705, "grad_norm": 0.8909335778267706, "kl": 0.2421875, "learning_rate": 4.070920709453597e-07, "loss": 0.0024710441939532757, "memory(GiB)": 137.01, "reward": 1.8602908849716187, "reward_std": 0.026720553636550903, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5794177055358887, "rewards/EvidenceHallucination/std": 0.4214595556259155, "rewards/Evidence_Num_Record/mean": 5.261904716491699, "rewards/Evidence_Num_Record/std": 2.0607781410217285, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6777406334877014, "rewards/VideoAccuracy/std": 0.4865841567516327, "step": 705, "train_speed(iter/s)": 1.808727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 957.0, "completions/mean_length": 537.6904907226562, "completions/min_length": 314.0, "entropy/max": 0.7734375, "entropy/mean": 0.416015625, "entropy/min": 0.2578125, "epoch": 0.706, "grad_norm": 1.1417716671268525, "kl": 0.24609375, "learning_rate": 4.0453967554731207e-07, "loss": 0.0025275161024183035, "memory(GiB)": 137.02, "reward": 1.4983958005905151, "reward_std": 0.1210184395313263, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.31716296076774597, "rewards/EvidenceHallucination/std": 0.437943696975708, "rewards/Evidence_Num_Record/mean": 5.547619342803955, "rewards/Evidence_Num_Record/std": 2.3808419704437256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.36829641461372375, "rewards/VideoAccuracy/std": 0.40108880400657654, "step": 706, "train_speed(iter/s)": 1.640966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 600.0, "completions/mean_length": 435.9761962890625, "completions/min_length": 309.0, "entropy/max": 0.6171875, "entropy/mean": 0.4453125, "entropy/min": 0.251953125, "epoch": 0.707, "grad_norm": 1.1649223404096742, "kl": 0.28515625, "learning_rate": 4.019932764281211e-07, "loss": 0.0028660595417022705, "memory(GiB)": 137.02, "reward": 1.719510555267334, "reward_std": 0.3151416778564453, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42755359411239624, "rewards/EvidenceHallucination/std": 0.4588277339935303, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.0339757204055786, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.5435237288475037, "rewards/VideoAccuracy/std": 0.5430207848548889, "step": 707, "train_speed(iter/s)": 1.511853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2298.0, "completions/mean_length": 568.2619018554688, "completions/min_length": 344.0, "entropy/max": 0.5625, "entropy/mean": 0.25390625, "entropy/min": 0.11474609375, "epoch": 0.708, "grad_norm": 0.9712050959951763, "kl": 0.2119140625, "learning_rate": 3.9945289922999705e-07, "loss": 0.0022306388709694147, "memory(GiB)": 137.02, "reward": 2.0796968936920166, "reward_std": 0.157607302069664, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.17472554743289948, "rewards/EvidenceHallucination/std": 0.29664650559425354, "rewards/Evidence_Num_Record/mean": 4.928571701049805, "rewards/Evidence_Num_Record/std": 5.274849891662598, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.9161801934242249, "rewards/VideoAccuracy/std": 0.3562900424003601, "step": 708, "train_speed(iter/s)": 1.233763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1125.0, "completions/mean_length": 519.2380981445312, "completions/min_length": 332.0, "entropy/max": 0.6171875, "entropy/mean": 0.431640625, "entropy/min": 0.255859375, "epoch": 0.709, "grad_norm": 1.118068943990787, "kl": 0.267578125, "learning_rate": 3.9691856953451043e-07, "loss": 0.0027058953419327736, "memory(GiB)": 137.02, "reward": 1.5037561655044556, "reward_std": 0.2950292229652405, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28459712862968445, "rewards/EvidenceHallucination/std": 0.4152517020702362, "rewards/Evidence_Num_Record/mean": 5.261904716491699, "rewards/Evidence_Num_Record/std": 2.8632194995880127, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.3849318325519562, "rewards/VideoAccuracy/std": 0.4115446209907532, "step": 709, "train_speed(iter/s)": 1.151477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/mean_length": 521.9285888671875, "completions/min_length": 344.0, "entropy/max": 0.75390625, "entropy/mean": 0.44921875, "entropy/min": 0.26171875, "epoch": 0.71, "grad_norm": 1.1740647837126024, "kl": 0.251953125, "learning_rate": 3.943903128623335e-07, "loss": 0.002540436340495944, "memory(GiB)": 137.02, "reward": 1.4951140880584717, "reward_std": 0.16538338363170624, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2859345078468323, "rewards/EvidenceHallucination/std": 0.3980078399181366, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 0.9678334593772888, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.404593825340271, "rewards/VideoAccuracy/std": 0.5376194715499878, "step": 710, "train_speed(iter/s)": 1.062856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 924.0, "completions/mean_length": 514.7380981445312, "completions/min_length": 364.0, "entropy/max": 0.58984375, "entropy/mean": 0.32421875, "entropy/min": 0.1005859375, "epoch": 0.711, "grad_norm": 1.076386212525606, "kl": 0.2294921875, "learning_rate": 3.918681546729822e-07, "loss": 0.0023401621729135513, "memory(GiB)": 137.02, "reward": 1.8365966081619263, "reward_std": 0.07770262658596039, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32080966234207153, "rewards/EvidenceHallucination/std": 0.4606294333934784, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.6176836490631104, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5724345445632935, "rewards/VideoAccuracy/std": 0.4593268632888794, "step": 711, "train_speed(iter/s)": 1.009283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/mean_length": 572.8095092773438, "completions/min_length": 286.0, "entropy/max": 1.25, "entropy/mean": 0.49609375, "entropy/min": 0.28125, "epoch": 0.712, "grad_norm": 1.4269730629808097, "kl": 0.259765625, "learning_rate": 3.8935212036456175e-07, "loss": 0.0026799244806170464, "memory(GiB)": 137.02, "reward": 1.976933479309082, "reward_std": 0.23799024522304535, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6350836753845215, "rewards/EvidenceHallucination/std": 0.3273024559020996, "rewards/Evidence_Num_Record/mean": 6.452381134033203, "rewards/Evidence_Num_Record/std": 4.712217330932617, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8165834546089172, "rewards/VideoAccuracy/std": 0.3730264902114868, "step": 712, "train_speed(iter/s)": 0.941002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 740.0, "completions/mean_length": 488.8095397949219, "completions/min_length": 345.0, "entropy/max": 0.6484375, "entropy/mean": 0.455078125, "entropy/min": 0.25, "epoch": 0.713, "grad_norm": 1.3572604623602544, "kl": 0.271484375, "learning_rate": 3.868422352735102e-07, "loss": 0.0027485534083098173, "memory(GiB)": 137.02, "reward": 1.7807960510253906, "reward_std": 0.3023853003978729, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5612412691116333, "rewards/EvidenceHallucination/std": 0.43778860569000244, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 1.4362164735794067, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.639976441860199, "rewards/VideoAccuracy/std": 0.4260217845439911, "step": 713, "train_speed(iter/s)": 0.877992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 961.0, "completions/mean_length": 534.7857055664062, "completions/min_length": 360.0, "entropy/max": 0.53125, "entropy/mean": 0.326171875, "entropy/min": 0.123046875, "epoch": 0.714, "grad_norm": 1.1692359671507124, "kl": 0.2490234375, "learning_rate": 3.843385246743417e-07, "loss": 0.0025063527282327414, "memory(GiB)": 137.02, "reward": 2.2322938442230225, "reward_std": 0.14283570647239685, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.658531904220581, "rewards/EvidenceHallucination/std": 0.4137778580188751, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.7392277121543884, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9339207410812378, "rewards/VideoAccuracy/std": 0.4797661602497101, "step": 714, "train_speed(iter/s)": 0.832326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1641.0, "completions/mean_length": 593.452392578125, "completions/min_length": 354.0, "entropy/max": 3.0625, "entropy/mean": 0.48828125, "entropy/min": 0.14453125, "epoch": 0.715, "grad_norm": 1.181223745862962, "kl": 0.2255859375, "learning_rate": 3.818410137793947e-07, "loss": 0.002410789020359516, "memory(GiB)": 137.02, "reward": 1.8372621536254883, "reward_std": 0.2797810137271881, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.498088538646698, "rewards/EvidenceHallucination/std": 0.36788809299468994, "rewards/Evidence_Num_Record/mean": 5.904761791229248, "rewards/Evidence_Num_Record/std": 4.360630512237549, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6709778308868408, "rewards/VideoAccuracy/std": 0.4687010943889618, "step": 715, "train_speed(iter/s)": 0.781068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1877.0, "completions/mean_length": 493.7857360839844, "completions/min_length": 257.0, "entropy/max": 0.83203125, "entropy/mean": 0.41796875, "entropy/min": 0.1513671875, "epoch": 0.716, "grad_norm": 0.9383627435068316, "kl": 0.265625, "learning_rate": 3.7934972773857634e-07, "loss": 0.0028080097399652004, "memory(GiB)": 137.02, "reward": 1.4566277265548706, "reward_std": 0.08022846281528473, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27737775444984436, "rewards/EvidenceHallucination/std": 0.4242125153541565, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.9284530878067017, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.3392474055290222, "rewards/VideoAccuracy/std": 0.3877919316291809, "step": 716, "train_speed(iter/s)": 0.730442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 616.0, "completions/mean_length": 463.1428527832031, "completions/min_length": 319.0, "entropy/max": 0.84375, "entropy/mean": 0.427734375, "entropy/min": 0.287109375, "epoch": 0.717, "grad_norm": 1.183539422038026, "kl": 0.265625, "learning_rate": 3.7686469163910883e-07, "loss": 0.0026837345212697983, "memory(GiB)": 137.02, "reward": 2.0717267990112305, "reward_std": 0.3083031177520752, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5801745653152466, "rewards/EvidenceHallucination/std": 0.44655001163482666, "rewards/Evidence_Num_Record/mean": 4.547619342803955, "rewards/Evidence_Num_Record/std": 1.1305595636367798, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.85569167137146, "rewards/VideoAccuracy/std": 0.6598480343818665, "step": 717, "train_speed(iter/s)": 0.704933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/mean_length": 560.0238037109375, "completions/min_length": 268.0, "entropy/max": 1.9453125, "entropy/mean": 0.37890625, "entropy/min": 0.10400390625, "epoch": 0.718, "grad_norm": 1.174024215201866, "kl": 0.2255859375, "learning_rate": 3.7438593050527846e-07, "loss": 0.0023144427686929703, "memory(GiB)": 137.02, "reward": 1.712986946105957, "reward_std": 0.1705286204814911, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4080731272697449, "rewards/EvidenceHallucination/std": 0.45950257778167725, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 1.361491322517395, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5476190447807312, "rewards/HonestTime/std": 0.5037605166435242, "rewards/VideoAccuracy/mean": 0.5218484997749329, "rewards/VideoAccuracy/std": 0.3351951837539673, "step": 718, "train_speed(iter/s)": 0.663054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2105.0, "completions/mean_length": 543.1428833007812, "completions/min_length": 265.0, "entropy/max": 1.0234375, "entropy/mean": 0.466796875, "entropy/min": 0.1669921875, "epoch": 0.719, "grad_norm": 1.0306257671320522, "kl": 0.2431640625, "learning_rate": 3.719134692981826e-07, "loss": 0.0025380700826644897, "memory(GiB)": 137.02, "reward": 1.3437881469726562, "reward_std": 0.03641896694898605, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20182187855243683, "rewards/EvidenceHallucination/std": 0.392501562833786, "rewards/Evidence_Num_Record/mean": 5.333333492279053, "rewards/Evidence_Num_Record/std": 3.7329559326171875, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.24151895940303802, "rewards/VideoAccuracy/std": 0.37862592935562134, "step": 719, "train_speed(iter/s)": 0.627096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 641.0, "completions/mean_length": 448.90478515625, "completions/min_length": 358.0, "entropy/max": 0.5859375, "entropy/mean": 0.447265625, "entropy/min": 0.33203125, "epoch": 0.72, "grad_norm": 1.224799165841684, "kl": 0.267578125, "learning_rate": 3.694473329154778e-07, "loss": 0.002683891449123621, "memory(GiB)": 137.02, "reward": 1.6364336013793945, "reward_std": 0.3253479599952698, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35876768827438354, "rewards/EvidenceHallucination/std": 0.4279513955116272, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.9093654751777649, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5361085534095764, "rewards/VideoAccuracy/std": 0.6426951289176941, "step": 720, "train_speed(iter/s)": 0.605893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 950.0, "completions/mean_length": 570.5714111328125, "completions/min_length": 408.0, "entropy/max": 0.486328125, "entropy/mean": 0.267578125, "entropy/min": 0.1181640625, "epoch": 0.721, "grad_norm": 0.9768702301605966, "kl": 0.2080078125, "learning_rate": 3.6698754619112973e-07, "loss": 0.0021096975542604923, "memory(GiB)": 137.02, "reward": 2.3190793991088867, "reward_std": 0.10299627482891083, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3182400166988373, "rewards/EvidenceHallucination/std": 0.4300622045993805, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 1.0473682880401611, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.0554314851760864, "rewards/VideoAccuracy/std": 0.16592219471931458, "step": 721, "train_speed(iter/s)": 0.56258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/mean_length": 621.4761962890625, "completions/min_length": 382.0, "entropy/max": 0.765625, "entropy/mean": 0.4453125, "entropy/min": 0.228515625, "epoch": 0.722, "grad_norm": 1.445071191813976, "kl": 0.23046875, "learning_rate": 3.6453413389516385e-07, "loss": 0.0023926938883960247, "memory(GiB)": 137.02, "reward": 2.033181667327881, "reward_std": 0.12473130226135254, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7227866649627686, "rewards/EvidenceHallucination/std": 0.2981109917163849, "rewards/Evidence_Num_Record/mean": 7.0714287757873535, "rewards/Evidence_Num_Record/std": 3.9471688270568848, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8552910089492798, "rewards/VideoAccuracy/std": 0.32602179050445557, "step": 722, "train_speed(iter/s)": 0.544616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 742.0, "completions/mean_length": 492.19049072265625, "completions/min_length": 317.0, "entropy/max": 0.59375, "entropy/mean": 0.447265625, "entropy/min": 0.275390625, "epoch": 0.723, "grad_norm": 1.2170647391313107, "kl": 0.265625, "learning_rate": 3.62087120733415e-07, "loss": 0.0026650577783584595, "memory(GiB)": 137.02, "reward": 1.5765604972839355, "reward_std": 0.25286492705345154, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39277371764183044, "rewards/EvidenceHallucination/std": 0.4677566885948181, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.3657498359680176, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.4075295329093933, "rewards/VideoAccuracy/std": 0.3982459306716919, "step": 723, "train_speed(iter/s)": 0.529228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/mean_length": 539.1190795898438, "completions/min_length": 355.0, "entropy/max": 0.56640625, "entropy/mean": 0.349609375, "entropy/min": 0.095703125, "epoch": 0.724, "grad_norm": 1.1063410984775115, "kl": 0.24609375, "learning_rate": 3.596465313472777e-07, "loss": 0.002490151673555374, "memory(GiB)": 137.02, "reward": 1.788365364074707, "reward_std": 0.16274519264698029, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4031307101249695, "rewards/EvidenceHallucination/std": 0.4470342695713043, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 1.0779707431793213, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.738095223903656, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.5601202249526978, "rewards/VideoAccuracy/std": 0.5124112367630005, "step": 724, "train_speed(iter/s)": 0.508674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2080.0, "completions/mean_length": 616.5952758789062, "completions/min_length": 336.0, "entropy/max": 1.109375, "entropy/mean": 0.353515625, "entropy/min": 0.166015625, "epoch": 0.725, "grad_norm": 1.208381710184627, "kl": 0.22265625, "learning_rate": 3.5721239031346063e-07, "loss": 0.0023550393525511026, "memory(GiB)": 137.02, "reward": 2.0842397212982178, "reward_std": 0.2013828456401825, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6131070256233215, "rewards/EvidenceHallucination/std": 0.3226134181022644, "rewards/Evidence_Num_Record/mean": 6.404761791229248, "rewards/Evidence_Num_Record/std": 6.0164794921875, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8949514031410217, "rewards/VideoAccuracy/std": 0.29472899436950684, "step": 725, "train_speed(iter/s)": 0.480815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 719.0, "completions/mean_length": 507.1190490722656, "completions/min_length": 332.0, "entropy/max": 0.7578125, "entropy/mean": 0.4375, "entropy/min": 0.2451171875, "epoch": 0.726, "grad_norm": 1.3085967014885909, "kl": 0.259765625, "learning_rate": 3.5478472214373713e-07, "loss": 0.0026305762585252523, "memory(GiB)": 137.02, "reward": 1.688267469406128, "reward_std": 0.15720278024673462, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.43249648809432983, "rewards/EvidenceHallucination/std": 0.44495826959609985, "rewards/Evidence_Num_Record/mean": 4.690476417541504, "rewards/Evidence_Num_Record/std": 1.45649254322052, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.5160539150238037, "rewards/VideoAccuracy/std": 0.3884340226650238, "step": 726, "train_speed(iter/s)": 0.468899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/mean_length": 435.26190185546875, "completions/min_length": 201.0, "entropy/max": 0.7578125, "entropy/mean": 0.466796875, "entropy/min": 0.26953125, "epoch": 0.727, "grad_norm": 1.4513180977801843, "kl": 0.291015625, "learning_rate": 3.523635512846981e-07, "loss": 0.002929308917373419, "memory(GiB)": 137.02, "reward": 1.5991050004959106, "reward_std": 0.29706868529319763, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2844417691230774, "rewards/EvidenceHallucination/std": 0.4057222902774811, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 1.3216679096221924, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.44221657514572144, "rewards/VideoAccuracy/std": 0.4839160740375519, "step": 727, "train_speed(iter/s)": 0.457833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/mean_length": 604.9285888671875, "completions/min_length": 365.0, "entropy/max": 0.69140625, "entropy/mean": 0.296875, "entropy/min": 0.12158203125, "epoch": 0.728, "grad_norm": 0.8739823875491329, "kl": 0.20703125, "learning_rate": 3.4994890211750747e-07, "loss": 0.002132317516952753, "memory(GiB)": 137.02, "reward": 1.9869023561477661, "reward_std": 0.14404073357582092, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5619248151779175, "rewards/EvidenceHallucination/std": 0.43674349784851074, "rewards/Evidence_Num_Record/mean": 5.095238208770752, "rewards/Evidence_Num_Record/std": 3.962073564529419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.7459458708763123, "rewards/VideoAccuracy/std": 0.4712871015071869, "step": 728, "train_speed(iter/s)": 0.440075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1148.0, "completions/mean_length": 535.5952758789062, "completions/min_length": 386.0, "entropy/max": 0.68359375, "entropy/mean": 0.4609375, "entropy/min": 0.2578125, "epoch": 0.729, "grad_norm": 1.282961691054857, "kl": 0.251953125, "learning_rate": 3.4754079895765596e-07, "loss": 0.002549453405663371, "memory(GiB)": 137.02, "reward": 1.6989330053329468, "reward_std": 0.23000681400299072, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45383572578430176, "rewards/EvidenceHallucination/std": 0.44538405537605286, "rewards/Evidence_Num_Record/mean": 5.547619342803955, "rewards/Evidence_Num_Record/std": 2.318561315536499, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.5510229468345642, "rewards/VideoAccuracy/std": 0.4384976029396057, "step": 729, "train_speed(iter/s)": 0.43011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 636.0, "completions/mean_length": 472.0476379394531, "completions/min_length": 333.0, "entropy/max": 0.63671875, "entropy/mean": 0.46875, "entropy/min": 0.33984375, "epoch": 0.73, "grad_norm": 1.5204018132602868, "kl": 0.279296875, "learning_rate": 3.45139266054715e-07, "loss": 0.0028142035007476807, "memory(GiB)": 137.02, "reward": 1.6489835977554321, "reward_std": 0.31000250577926636, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3234373927116394, "rewards/EvidenceHallucination/std": 0.4225875735282898, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 0.813646674156189, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5176292061805725, "rewards/VideoAccuracy/std": 0.5845872163772583, "step": 730, "train_speed(iter/s)": 0.420384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 932.0, "completions/mean_length": 509.5952453613281, "completions/min_length": 318.0, "entropy/max": 0.91796875, "entropy/mean": 0.330078125, "entropy/min": 0.1328125, "epoch": 0.731, "grad_norm": 1.0430166734748847, "kl": 0.24609375, "learning_rate": 3.427443275920945e-07, "loss": 0.0025074242148548365, "memory(GiB)": 137.02, "reward": 2.30470871925354, "reward_std": 0.17227831482887268, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6398620009422302, "rewards/EvidenceHallucination/std": 0.4209192991256714, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9814980030059814, "rewards/VideoAccuracy/std": 0.3276780843734741, "step": 731, "train_speed(iter/s)": 0.407664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1714.0, "completions/mean_length": 607.1428833007812, "completions/min_length": 380.0, "entropy/max": 1.515625, "entropy/mean": 0.435546875, "entropy/min": 0.16796875, "epoch": 0.732, "grad_norm": 1.107850934047529, "kl": 0.23046875, "learning_rate": 3.403560076867985e-07, "loss": 0.002410092856734991, "memory(GiB)": 137.02, "reward": 1.703174114227295, "reward_std": 0.10910899937152863, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48142144083976746, "rewards/EvidenceHallucination/std": 0.45236778259277344, "rewards/Evidence_Num_Record/mean": 6.761904716491699, "rewards/Evidence_Num_Record/std": 3.8686327934265137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5783182978630066, "rewards/VideoAccuracy/std": 0.46562719345092773, "step": 732, "train_speed(iter/s)": 0.394389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 754.0, "completions/mean_length": 475.19049072265625, "completions/min_length": 308.0, "entropy/max": 0.55078125, "entropy/mean": 0.427734375, "entropy/min": 0.2734375, "epoch": 0.733, "grad_norm": 1.1707575932936314, "kl": 0.271484375, "learning_rate": 3.3797433038918145e-07, "loss": 0.0027360159438103437, "memory(GiB)": 137.02, "reward": 1.5253936052322388, "reward_std": 0.3052489757537842, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.30475470423698425, "rewards/EvidenceHallucination/std": 0.4269247353076935, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 1.3077540397644043, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.4120616614818573, "rewards/VideoAccuracy/std": 0.408565491437912, "step": 733, "train_speed(iter/s)": 0.378443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 596.0, "completions/mean_length": 456.71429443359375, "completions/min_length": 324.0, "entropy/max": 0.53125, "entropy/mean": 0.353515625, "entropy/min": 0.1826171875, "epoch": 0.734, "grad_norm": 1.0924188852331589, "kl": 0.27734375, "learning_rate": 3.355993196827075e-07, "loss": 0.002987553831189871, "memory(GiB)": 137.02, "reward": 2.08844256401062, "reward_std": 0.16964636743068695, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6116736531257629, "rewards/EvidenceHallucination/std": 0.4442037343978882, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 0.9472129940986633, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.8042028546333313, "rewards/VideoAccuracy/std": 0.51776522397995, "step": 734, "train_speed(iter/s)": 0.369455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/mean_length": 596.3333740234375, "completions/min_length": 334.0, "entropy/max": 0.76171875, "entropy/mean": 0.40625, "entropy/min": 0.16796875, "epoch": 0.735, "grad_norm": 1.0015811368115848, "kl": 0.2353515625, "learning_rate": 3.332309994837085e-07, "loss": 0.002433416899293661, "memory(GiB)": 137.02, "reward": 1.982049584388733, "reward_std": 0.2236880511045456, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.604497492313385, "rewards/EvidenceHallucination/std": 0.39630842208862305, "rewards/Evidence_Num_Record/mean": 5.738095283508301, "rewards/Evidence_Num_Record/std": 3.246457815170288, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7944831848144531, "rewards/VideoAccuracy/std": 0.5202955007553101, "step": 735, "train_speed(iter/s)": 0.357714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 876.0, "completions/mean_length": 517.1904907226562, "completions/min_length": 318.0, "entropy/max": 0.61328125, "entropy/mean": 0.40625, "entropy/min": 0.2216796875, "epoch": 0.736, "grad_norm": 1.324356052193191, "kl": 0.255859375, "learning_rate": 3.308693936411421e-07, "loss": 0.0025905664078891277, "memory(GiB)": 137.02, "reward": 1.7714544534683228, "reward_std": 0.3001267910003662, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5094105005264282, "rewards/EvidenceHallucination/std": 0.4729577600955963, "rewards/Evidence_Num_Record/mean": 4.785714149475098, "rewards/Evidence_Num_Record/std": 1.6897931098937988, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.5743343830108643, "rewards/VideoAccuracy/std": 0.35331177711486816, "step": 736, "train_speed(iter/s)": 0.351023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 730.0, "completions/mean_length": 462.71429443359375, "completions/min_length": 294.0, "entropy/max": 0.55859375, "entropy/mean": 0.40234375, "entropy/min": 0.25, "epoch": 0.737, "grad_norm": 1.2447147031615817, "kl": 0.287109375, "learning_rate": 3.2851452593635265e-07, "loss": 0.0029010369908064604, "memory(GiB)": 137.02, "reward": 1.7444754838943481, "reward_std": 0.20037083327770233, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3784564733505249, "rewards/EvidenceHallucination/std": 0.4463752210140228, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.0109734535217285, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.5687841176986694, "rewards/VideoAccuracy/std": 0.6110247373580933, "step": 737, "train_speed(iter/s)": 0.344746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 895.0, "completions/mean_length": 579.8809814453125, "completions/min_length": 426.0, "entropy/max": 1.5546875, "entropy/mean": 0.41015625, "entropy/min": 0.1630859375, "epoch": 0.738, "grad_norm": 0.9076314424297449, "kl": 0.2080078125, "learning_rate": 3.2616642008283214e-07, "loss": 0.0021310984157025814, "memory(GiB)": 137.02, "reward": 1.9944345951080322, "reward_std": 0.19511547684669495, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24974822998046875, "rewards/EvidenceHallucination/std": 0.35780394077301025, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.9660297632217407, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8111514449119568, "rewards/VideoAccuracy/std": 0.44406232237815857, "step": 738, "train_speed(iter/s)": 0.332188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 742.0, "completions/mean_length": 547.2619018554688, "completions/min_length": 340.0, "entropy/max": 0.59765625, "entropy/mean": 0.458984375, "entropy/min": 0.2451171875, "epoch": 0.739, "grad_norm": 1.0060410760992153, "kl": 0.2392578125, "learning_rate": 3.2382509972598084e-07, "loss": 0.0024247546680271626, "memory(GiB)": 137.02, "reward": 1.5274099111557007, "reward_std": 0.19877104461193085, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37413010001182556, "rewards/EvidenceHallucination/std": 0.4627932608127594, "rewards/Evidence_Num_Record/mean": 5.833333492279053, "rewards/Evidence_Num_Record/std": 1.7096093893051147, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.39067915081977844, "rewards/VideoAccuracy/std": 0.41949576139450073, "step": 739, "train_speed(iter/s)": 0.327163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 800.0, "completions/mean_length": 481.26190185546875, "completions/min_length": 314.0, "entropy/max": 0.57421875, "entropy/mean": 0.419921875, "entropy/min": 0.2734375, "epoch": 0.74, "grad_norm": 1.2859510032541313, "kl": 0.279296875, "learning_rate": 3.214905884428679e-07, "loss": 0.002799172420054674, "memory(GiB)": 137.02, "reward": 1.4543876647949219, "reward_std": 0.15212209522724152, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20015443861484528, "rewards/EvidenceHallucination/std": 0.3680866062641144, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.6562975645065308, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.35721397399902344, "rewards/VideoAccuracy/std": 0.4436786472797394, "step": 740, "train_speed(iter/s)": 0.322398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/mean_length": 587.452392578125, "completions/min_length": 350.0, "entropy/max": 0.5390625, "entropy/mean": 0.302734375, "entropy/min": 0.10107421875, "epoch": 0.741, "grad_norm": 0.8845740254085731, "kl": 0.2119140625, "learning_rate": 3.1916290974199655e-07, "loss": 0.002165029523894191, "memory(GiB)": 137.02, "reward": 2.206284999847412, "reward_std": 0.12530550360679626, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6892232298851013, "rewards/EvidenceHallucination/std": 0.380018025636673, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.6251507997512817, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.8779641389846802, "rewards/VideoAccuracy/std": 0.3144403398036957, "step": 741, "train_speed(iter/s)": 0.313079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/mean_length": 541.357177734375, "completions/min_length": 277.0, "entropy/max": 1.203125, "entropy/mean": 0.53515625, "entropy/min": 0.3125, "epoch": 0.742, "grad_norm": 1.1111498340287014, "kl": 0.26171875, "learning_rate": 3.168420870630657e-07, "loss": 0.002663705265149474, "memory(GiB)": 137.02, "reward": 1.623490810394287, "reward_std": 0.1593041568994522, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4474904239177704, "rewards/EvidenceHallucination/std": 0.44139039516448975, "rewards/Evidence_Num_Record/mean": 5.857142925262451, "rewards/Evidence_Num_Record/std": 2.415109157562256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.095238097012043, "rewards/HonestTime/std": 0.2971017360687256, "rewards/VideoAccuracy/mean": 0.5149451494216919, "rewards/VideoAccuracy/std": 0.4730999767780304, "step": 742, "train_speed(iter/s)": 0.307681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 676.0, "completions/mean_length": 465.8809509277344, "completions/min_length": 292.0, "entropy/max": 0.625, "entropy/mean": 0.4375, "entropy/min": 0.30078125, "epoch": 0.743, "grad_norm": 1.224308382413667, "kl": 0.2734375, "learning_rate": 3.1452814377673343e-07, "loss": 0.002747116144746542, "memory(GiB)": 137.02, "reward": 1.530137538909912, "reward_std": 0.22144876420497894, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3962409794330597, "rewards/EvidenceHallucination/std": 0.46595442295074463, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.2204699516296387, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.3985084295272827, "rewards/VideoAccuracy/std": 0.41005414724349976, "step": 743, "train_speed(iter/s)": 0.302552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 967.0, "completions/mean_length": 544.452392578125, "completions/min_length": 392.0, "entropy/max": 0.51171875, "entropy/mean": 0.326171875, "entropy/min": 0.10693359375, "epoch": 0.744, "grad_norm": 1.166318698999342, "kl": 0.2490234375, "learning_rate": 3.12221103184383e-07, "loss": 0.0025127048138529062, "memory(GiB)": 137.02, "reward": 1.797698974609375, "reward_std": 0.151752769947052, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3840665817260742, "rewards/EvidenceHallucination/std": 0.47486865520477295, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.0280616283416748, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.785714328289032, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 0.5637427568435669, "rewards/VideoAccuracy/std": 0.4568615257740021, "step": 744, "train_speed(iter/s)": 0.296248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 766.0, "completions/mean_length": 544.6428833007812, "completions/min_length": 320.0, "entropy/max": 1.3203125, "entropy/mean": 0.455078125, "entropy/min": 0.1494140625, "epoch": 0.745, "grad_norm": 0.9852337804421616, "kl": 0.2412109375, "learning_rate": 3.0992098851788817e-07, "loss": 0.0024463534355163574, "memory(GiB)": 137.02, "reward": 1.7108333110809326, "reward_std": 0.027525782585144043, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41925325989723206, "rewards/EvidenceHallucination/std": 0.4327711760997772, "rewards/Evidence_Num_Record/mean": 5.142857074737549, "rewards/Evidence_Num_Record/std": 1.7884647846221924, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5650777220726013, "rewards/VideoAccuracy/std": 0.505497932434082, "step": 745, "train_speed(iter/s)": 0.290104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 704.0, "completions/mean_length": 492.26190185546875, "completions/min_length": 247.0, "entropy/max": 0.64453125, "entropy/mean": 0.45703125, "entropy/min": 0.287109375, "epoch": 0.746, "grad_norm": 1.3799214356283465, "kl": 0.259765625, "learning_rate": 3.0762782293937727e-07, "loss": 0.002617140766233206, "memory(GiB)": 137.02, "reward": 1.6975003480911255, "reward_std": 0.14715532958507538, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4007459878921509, "rewards/EvidenceHallucination/std": 0.45319873094558716, "rewards/Evidence_Num_Record/mean": 5.095238208770752, "rewards/Evidence_Num_Record/std": 1.4618651866912842, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.5268748998641968, "rewards/VideoAccuracy/std": 0.39935460686683655, "step": 746, "train_speed(iter/s)": 0.284507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/mean_length": 478.21429443359375, "completions/min_length": 327.0, "entropy/max": 0.671875, "entropy/mean": 0.41796875, "entropy/min": 0.296875, "epoch": 0.747, "grad_norm": 1.0914875373782056, "kl": 0.2890625, "learning_rate": 3.0534162954100263e-07, "loss": 0.0029141679406166077, "memory(GiB)": 137.02, "reward": 1.831376552581787, "reward_std": 0.20554934442043304, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3792650103569031, "rewards/EvidenceHallucination/std": 0.431448757648468, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.6602855324745178, "rewards/VideoAccuracy/std": 0.6216288208961487, "step": 747, "train_speed(iter/s)": 0.280518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 751.0, "completions/mean_length": 550.2142944335938, "completions/min_length": 373.0, "entropy/max": 1.1484375, "entropy/mean": 0.33203125, "entropy/min": 0.138671875, "epoch": 0.748, "grad_norm": 1.0007753875117082, "kl": 0.2119140625, "learning_rate": 3.0306243134470667e-07, "loss": 0.0021712270099669695, "memory(GiB)": 137.02, "reward": 2.0038554668426514, "reward_std": 0.2609616219997406, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4727940559387207, "rewards/EvidenceHallucination/std": 0.4271722733974457, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.4989348649978638, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7759631872177124, "rewards/VideoAccuracy/std": 0.471916139125824, "step": 748, "train_speed(iter/s)": 0.275697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2212.0, "completions/mean_length": 574.6904907226562, "completions/min_length": 384.0, "entropy/max": 1.0625, "entropy/mean": 0.43359375, "entropy/min": 0.1591796875, "epoch": 0.749, "grad_norm": 1.342246423324975, "kl": 0.2333984375, "learning_rate": 3.007902513019893e-07, "loss": 0.0023996024392545223, "memory(GiB)": 137.02, "reward": 1.821750521659851, "reward_std": 0.22269243001937866, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5023324489593506, "rewards/EvidenceHallucination/std": 0.43607866764068604, "rewards/Evidence_Num_Record/mean": 6.309524059295654, "rewards/Evidence_Num_Record/std": 4.181772708892822, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.6593791246414185, "rewards/VideoAccuracy/std": 0.38703349232673645, "step": 749, "train_speed(iter/s)": 0.27039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 759.0, "completions/mean_length": 486.1190490722656, "completions/min_length": 329.0, "entropy/max": 0.66796875, "entropy/mean": 0.4296875, "entropy/min": 0.259765625, "epoch": 0.75, "grad_norm": 1.206001882760331, "kl": 0.267578125, "learning_rate": 2.985251122936786e-07, "loss": 0.0027073421515524387, "memory(GiB)": 137.04, "reward": 1.4515140056610107, "reward_std": 0.20704594254493713, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27075642347335815, "rewards/EvidenceHallucination/std": 0.4357222020626068, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.1160845756530762, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.30688655376434326, "rewards/VideoAccuracy/std": 0.3046022355556488, "step": 750, "train_speed(iter/s)": 0.265822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1191.0, "completions/mean_length": 571.4761962890625, "completions/min_length": 343.0, "entropy/max": 0.58203125, "entropy/mean": 0.287109375, "entropy/min": 0.0927734375, "epoch": 0.751, "grad_norm": 0.9979638657062981, "kl": 0.2314453125, "learning_rate": 2.962670371296996e-07, "loss": 0.0023694182746112347, "memory(GiB)": 137.04, "reward": 1.9292515516281128, "reward_std": 0.17530755698680878, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.392671138048172, "rewards/EvidenceHallucination/std": 0.4279833436012268, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.632993221282959, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6507173776626587, "rewards/VideoAccuracy/std": 0.42830172181129456, "step": 751, "train_speed(iter/s)": 0.261773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2198.0, "completions/mean_length": 662.5952758789062, "completions/min_length": 377.0, "entropy/max": 0.66796875, "entropy/mean": 0.42578125, "entropy/min": 0.1962890625, "epoch": 0.752, "grad_norm": 1.176046473783173, "kl": 0.2275390625, "learning_rate": 2.9401604854884357e-07, "loss": 0.002370295813307166, "memory(GiB)": 137.04, "reward": 1.7728904485702515, "reward_std": 0.13586002588272095, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49897584319114685, "rewards/EvidenceHallucination/std": 0.41422632336616516, "rewards/Evidence_Num_Record/mean": 7.238095283508301, "rewards/Evidence_Num_Record/std": 6.495061874389648, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6397619247436523, "rewards/VideoAccuracy/std": 0.4417704939842224, "step": 752, "train_speed(iter/s)": 0.257461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 689.0, "completions/mean_length": 462.16668701171875, "completions/min_length": 303.0, "entropy/max": 0.58203125, "entropy/mean": 0.44140625, "entropy/min": 0.279296875, "epoch": 0.753, "grad_norm": 1.4758477793811793, "kl": 0.287109375, "learning_rate": 2.9177216921854096e-07, "loss": 0.0028979978524148464, "memory(GiB)": 137.04, "reward": 1.8449641466140747, "reward_std": 0.3156450688838959, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5636225342750549, "rewards/EvidenceHallucination/std": 0.43338704109191895, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.3477731943130493, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.6703349351882935, "rewards/VideoAccuracy/std": 0.350552499294281, "step": 753, "train_speed(iter/s)": 0.254208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 915.0, "completions/mean_length": 567.8333740234375, "completions/min_length": 340.0, "entropy/max": 0.61328125, "entropy/mean": 0.34765625, "entropy/min": 0.16796875, "epoch": 0.754, "grad_norm": 1.0844028372997443, "kl": 0.2470703125, "learning_rate": 2.895354217346313e-07, "loss": 0.002512303413823247, "memory(GiB)": 137.04, "reward": 2.207542896270752, "reward_std": 0.20663592219352722, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7446616291999817, "rewards/EvidenceHallucination/std": 0.3785170614719391, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 0.9947597980499268, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8919438123703003, "rewards/VideoAccuracy/std": 0.4285814166069031, "step": 754, "train_speed(iter/s)": 0.250293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/mean_length": 558.40478515625, "completions/min_length": 346.0, "entropy/max": 0.828125, "entropy/mean": 0.443359375, "entropy/min": 0.1435546875, "epoch": 0.755, "grad_norm": 1.21396233764057, "kl": 0.234375, "learning_rate": 2.873058286211374e-07, "loss": 0.0024085480254143476, "memory(GiB)": 137.04, "reward": 1.996396780014038, "reward_std": 0.046993501484394073, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6595722436904907, "rewards/EvidenceHallucination/std": 0.3664809465408325, "rewards/Evidence_Num_Record/mean": 5.61904764175415, "rewards/Evidence_Num_Record/std": 2.9545531272888184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.802577555179596, "rewards/VideoAccuracy/std": 0.37356090545654297, "step": 755, "train_speed(iter/s)": 0.242949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 829.0, "completions/mean_length": 498.5476379394531, "completions/min_length": 271.0, "entropy/max": 0.62890625, "entropy/mean": 0.447265625, "entropy/min": 0.3125, "epoch": 0.756, "grad_norm": 1.2881935895616428, "kl": 0.26171875, "learning_rate": 2.8508341233003654e-07, "loss": 0.002634369535371661, "memory(GiB)": 137.04, "reward": 1.6359120607376099, "reward_std": 0.24966934323310852, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39463216066360474, "rewards/EvidenceHallucination/std": 0.4662226140499115, "rewards/Evidence_Num_Record/mean": 4.809524059295654, "rewards/Evidence_Num_Record/std": 1.6265795230865479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.46174752712249756, "rewards/VideoAccuracy/std": 0.3875961899757385, "step": 756, "train_speed(iter/s)": 0.240242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 625.0, "completions/mean_length": 454.16668701171875, "completions/min_length": 287.0, "entropy/max": 0.8828125, "entropy/mean": 0.43359375, "entropy/min": 0.298828125, "epoch": 0.757, "grad_norm": 1.2744736247170079, "kl": 0.291015625, "learning_rate": 2.828681952410366e-07, "loss": 0.0029427676927298307, "memory(GiB)": 137.04, "reward": 1.8057868480682373, "reward_std": 0.22455629706382751, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33172181248664856, "rewards/EvidenceHallucination/std": 0.4148164391517639, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.0406935214996338, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6394423842430115, "rewards/VideoAccuracy/std": 0.7114621996879578, "step": 757, "train_speed(iter/s)": 0.237344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 844.0, "completions/mean_length": 599.7619018554688, "completions/min_length": 392.0, "entropy/max": 0.81640625, "entropy/mean": 0.349609375, "entropy/min": 0.154296875, "epoch": 0.758, "grad_norm": 0.9210292766563974, "kl": 0.2138671875, "learning_rate": 2.8066019966134904e-07, "loss": 0.0021720444783568382, "memory(GiB)": 137.04, "reward": 1.9884105920791626, "reward_std": 0.15015339851379395, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.542344868183136, "rewards/EvidenceHallucination/std": 0.41900986433029175, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 2.176849842071533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7466084361076355, "rewards/VideoAccuracy/std": 0.5317025780677795, "step": 758, "train_speed(iter/s)": 0.232996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 639.0, "completions/mean_length": 489.0, "completions/min_length": 286.0, "entropy/max": 0.66015625, "entropy/mean": 0.447265625, "entropy/min": 0.298828125, "epoch": 0.759, "grad_norm": 1.3120394752787807, "kl": 0.283203125, "learning_rate": 2.784594478254645e-07, "loss": 0.0028628166764974594, "memory(GiB)": 137.04, "reward": 1.803435206413269, "reward_std": 0.11848029494285583, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5675522685050964, "rewards/EvidenceHallucination/std": 0.45467978715896606, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 1.3251782655715942, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.604210615158081, "rewards/VideoAccuracy/std": 0.36147215962409973, "step": 759, "train_speed(iter/s)": 0.230587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 826.0, "completions/mean_length": 479.1428527832031, "completions/min_length": 355.0, "entropy/max": 0.6171875, "entropy/mean": 0.455078125, "entropy/min": 0.330078125, "epoch": 0.76, "grad_norm": 1.0488478775950267, "kl": 0.279296875, "learning_rate": 2.762659618949298e-07, "loss": 0.002819633577018976, "memory(GiB)": 137.04, "reward": 1.571948528289795, "reward_std": 0.17096464335918427, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3613227605819702, "rewards/EvidenceHallucination/std": 0.4511774778366089, "rewards/Evidence_Num_Record/mean": 4.5, "rewards/Evidence_Num_Record/std": 1.329771637916565, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.4330170452594757, "rewards/VideoAccuracy/std": 0.5008947253227234, "step": 760, "train_speed(iter/s)": 0.227529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 696.0, "completions/mean_length": 536.5714111328125, "completions/min_length": 387.0, "entropy/max": 0.62890625, "entropy/mean": 0.283203125, "entropy/min": 0.1103515625, "epoch": 0.761, "grad_norm": 1.0830395809094013, "kl": 0.2314453125, "learning_rate": 2.7407976395812414e-07, "loss": 0.0023674783296883106, "memory(GiB)": 137.04, "reward": 2.1427016258239746, "reward_std": 0.147226020693779, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5402226448059082, "rewards/EvidenceHallucination/std": 0.4585988521575928, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 1.1323559284210205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8394187688827515, "rewards/VideoAccuracy/std": 0.40415194630622864, "step": 761, "train_speed(iter/s)": 0.224616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 924.0, "completions/mean_length": 538.2619018554688, "completions/min_length": 219.0, "entropy/max": 1.2578125, "entropy/mean": 0.53125, "entropy/min": 0.283203125, "epoch": 0.762, "grad_norm": 1.3926846899697776, "kl": 0.255859375, "learning_rate": 2.719008760300359e-07, "loss": 0.002642326056957245, "memory(GiB)": 137.04, "reward": 2.0062599182128906, "reward_std": 0.2314552515745163, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.731963574886322, "rewards/EvidenceHallucination/std": 0.3094104826450348, "rewards/Evidence_Num_Record/mean": 5.38095235824585, "rewards/Evidence_Num_Record/std": 1.974871277809143, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.7979623079299927, "rewards/VideoAccuracy/std": 0.3034944236278534, "step": 762, "train_speed(iter/s)": 0.221321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 872.0, "completions/mean_length": 538.7380981445312, "completions/min_length": 352.0, "entropy/max": 0.5703125, "entropy/mean": 0.427734375, "entropy/min": 0.2265625, "epoch": 0.763, "grad_norm": 1.3743981821647915, "kl": 0.27734375, "learning_rate": 2.6972932005204265e-07, "loss": 0.0028074365109205246, "memory(GiB)": 137.04, "reward": 1.5624735355377197, "reward_std": 0.27830272912979126, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3662137985229492, "rewards/EvidenceHallucination/std": 0.43138769268989563, "rewards/Evidence_Num_Record/mean": 5.047619342803955, "rewards/Evidence_Num_Record/std": 1.360637903213501, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.4273257553577423, "rewards/VideoAccuracy/std": 0.41626739501953125, "step": 763, "train_speed(iter/s)": 0.218726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1159.0, "completions/mean_length": 576.5, "completions/min_length": 307.0, "entropy/max": 0.53125, "entropy/mean": 0.365234375, "entropy/min": 0.17578125, "epoch": 0.764, "grad_norm": 1.1541825979084532, "kl": 0.2431640625, "learning_rate": 2.6756511789168924e-07, "loss": 0.0024590210523456335, "memory(GiB)": 137.04, "reward": 2.1274592876434326, "reward_std": 0.2759854793548584, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4846096634864807, "rewards/EvidenceHallucination/std": 0.412736177444458, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.1189426183700562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.8686324954032898, "rewards/VideoAccuracy/std": 0.41240498423576355, "step": 764, "train_speed(iter/s)": 0.215251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1158.0, "completions/mean_length": 620.5714111328125, "completions/min_length": 312.0, "entropy/max": 0.68359375, "entropy/mean": 0.40234375, "entropy/min": 0.126953125, "epoch": 0.765, "grad_norm": 1.2693623528387978, "kl": 0.2197265625, "learning_rate": 2.654082913424668e-07, "loss": 0.002237423788756132, "memory(GiB)": 137.04, "reward": 2.0295865535736084, "reward_std": 0.2693243622779846, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.497098833322525, "rewards/EvidenceHallucination/std": 0.4144033193588257, "rewards/Evidence_Num_Record/mean": 5.6666669845581055, "rewards/Evidence_Num_Record/std": 2.3443410396575928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.839690625667572, "rewards/VideoAccuracy/std": 0.37392786145210266, "step": 765, "train_speed(iter/s)": 0.212781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 659.0, "completions/mean_length": 502.26190185546875, "completions/min_length": 340.0, "entropy/max": 0.7265625, "entropy/mean": 0.42578125, "entropy/min": 0.28125, "epoch": 0.766, "grad_norm": 1.1298495045639512, "kl": 0.259765625, "learning_rate": 2.6325886212359496e-07, "loss": 0.002813272178173065, "memory(GiB)": 137.04, "reward": 1.403793215751648, "reward_std": 0.2295052856206894, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.14142382144927979, "rewards/EvidenceHallucination/std": 0.32684361934661865, "rewards/Evidence_Num_Record/mean": 4.6666669845581055, "rewards/Evidence_Num_Record/std": 0.9794639348983765, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.24217505753040314, "rewards/VideoAccuracy/std": 0.3239958584308624, "step": 766, "train_speed(iter/s)": 0.211075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 713.0, "completions/mean_length": 469.8809509277344, "completions/min_length": 286.0, "entropy/max": 0.7421875, "entropy/mean": 0.427734375, "entropy/min": 0.310546875, "epoch": 0.767, "grad_norm": 0.9006828184671115, "kl": 0.279296875, "learning_rate": 2.611168518798026e-07, "loss": 0.0030206870287656784, "memory(GiB)": 137.04, "reward": 1.644193172454834, "reward_std": 0.11802645772695541, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3115190863609314, "rewards/EvidenceHallucination/std": 0.42774006724357605, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.0548268556594849, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.48188936710357666, "rewards/VideoAccuracy/std": 0.6383110284805298, "step": 767, "train_speed(iter/s)": 0.208967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 973.0, "completions/mean_length": 651.3809814453125, "completions/min_length": 319.0, "entropy/max": 2.34375, "entropy/mean": 0.337890625, "entropy/min": 0.1279296875, "epoch": 0.768, "grad_norm": 1.0636513414616937, "kl": 0.1962890625, "learning_rate": 2.5898228218110827e-07, "loss": 0.0020056653302162886, "memory(GiB)": 137.04, "reward": 2.059290647506714, "reward_std": 0.394561767578125, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6037928462028503, "rewards/EvidenceHallucination/std": 0.4175638258457184, "rewards/Evidence_Num_Record/mean": 4.738095283508301, "rewards/Evidence_Num_Record/std": 1.6536659002304077, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.805198609828949, "rewards/VideoAccuracy/std": 0.457864910364151, "step": 768, "train_speed(iter/s)": 0.206188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/mean_length": 526.547607421875, "completions/min_length": 320.0, "entropy/max": 0.6484375, "entropy/mean": 0.4453125, "entropy/min": 0.2099609375, "epoch": 0.769, "grad_norm": 1.2295031926056552, "kl": 0.24609375, "learning_rate": 2.568551745226056e-07, "loss": 0.00250989873893559, "memory(GiB)": 137.04, "reward": 1.644586443901062, "reward_std": 0.09407269209623337, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4121597111225128, "rewards/EvidenceHallucination/std": 0.47044476866722107, "rewards/Evidence_Num_Record/mean": 4.88095235824585, "rewards/Evidence_Num_Record/std": 2.12077260017395, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.462154358625412, "rewards/VideoAccuracy/std": 0.3949291408061981, "step": 769, "train_speed(iter/s)": 0.203477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 744.0, "completions/mean_length": 463.4761962890625, "completions/min_length": 306.0, "entropy/max": 0.89453125, "entropy/mean": 0.435546875, "entropy/min": 0.291015625, "epoch": 0.77, "grad_norm": 0.932887645967211, "kl": 0.28515625, "learning_rate": 2.5473555032424534e-07, "loss": 0.0028784458991140127, "memory(GiB)": 137.04, "reward": 1.218045711517334, "reward_std": 0.11779654026031494, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.05753537639975548, "rewards/EvidenceHallucination/std": 0.2104816436767578, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.1736558675765991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.1398720145225525, "rewards/VideoAccuracy/std": 0.22162491083145142, "step": 770, "train_speed(iter/s)": 0.201377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/mean_length": 575.4761962890625, "completions/min_length": 361.0, "entropy/max": 0.5234375, "entropy/mean": 0.33203125, "entropy/min": 0.1513671875, "epoch": 0.771, "grad_norm": 1.0178695989855275, "kl": 0.21875, "learning_rate": 2.526234309306193e-07, "loss": 0.0022084922529757023, "memory(GiB)": 137.04, "reward": 1.9799977540969849, "reward_std": 0.23305070400238037, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5267239809036255, "rewards/EvidenceHallucination/std": 0.4495556652545929, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.1768676042556763, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430335700511932, "rewards/VideoAccuracy/mean": 0.6794148683547974, "rewards/VideoAccuracy/std": 0.38316991925239563, "step": 771, "train_speed(iter/s)": 0.198893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/mean_length": 572.40478515625, "completions/min_length": 392.0, "entropy/max": 0.5859375, "entropy/mean": 0.3984375, "entropy/min": 0.2294921875, "epoch": 0.772, "grad_norm": 1.2712903116725374, "kl": 0.265625, "learning_rate": 2.505188376107461e-07, "loss": 0.0026968661695718765, "memory(GiB)": 137.04, "reward": 1.6829097270965576, "reward_std": 0.31071701645851135, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4338894784450531, "rewards/EvidenceHallucination/std": 0.46595823764801025, "rewards/Evidence_Num_Record/mean": 5.690476417541504, "rewards/Evidence_Num_Record/std": 2.4741404056549072, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5294651389122009, "rewards/VideoAccuracy/std": 0.47676700353622437, "step": 772, "train_speed(iter/s)": 0.196044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 858.0, "completions/mean_length": 444.23809814453125, "completions/min_length": 234.0, "entropy/max": 0.6328125, "entropy/mean": 0.41796875, "entropy/min": 0.267578125, "epoch": 0.773, "grad_norm": 1.075729972846353, "kl": 0.2734375, "learning_rate": 2.4842179155785736e-07, "loss": 0.002762245712801814, "memory(GiB)": 137.04, "reward": 1.5281906127929688, "reward_std": 0.19979238510131836, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3526979386806488, "rewards/EvidenceHallucination/std": 0.46093907952308655, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.2259297370910645, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.40527021884918213, "rewards/VideoAccuracy/std": 0.40325766801834106, "step": 773, "train_speed(iter/s)": 0.193821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 601.0, "completions/mean_length": 466.4761962890625, "completions/min_length": 323.0, "entropy/max": 0.5390625, "entropy/mean": 0.3671875, "entropy/min": 0.1689453125, "epoch": 0.774, "grad_norm": 1.0876326363448923, "kl": 0.267578125, "learning_rate": 2.463323138891837e-07, "loss": 0.0026955704670399427, "memory(GiB)": 137.04, "reward": 1.9218472242355347, "reward_std": 0.24783822894096375, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4492851197719574, "rewards/EvidenceHallucination/std": 0.43904009461402893, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.1384934186935425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.6700853109359741, "rewards/VideoAccuracy/std": 0.5515148639678955, "step": 774, "train_speed(iter/s)": 0.192153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/mean_length": 622.6428833007812, "completions/min_length": 437.0, "entropy/max": 1.359375, "entropy/mean": 0.375, "entropy/min": 0.12060546875, "epoch": 0.775, "grad_norm": 0.9770357562197043, "kl": 0.2216796875, "learning_rate": 2.4425042564574185e-07, "loss": 0.002256374340504408, "memory(GiB)": 137.04, "reward": 1.7947856187820435, "reward_std": 0.21613454818725586, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.419340044260025, "rewards/EvidenceHallucination/std": 0.41037309169769287, "rewards/Evidence_Num_Record/mean": 6.095238208770752, "rewards/Evidence_Num_Record/std": 2.953373670578003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.6109175682067871, "rewards/VideoAccuracy/std": 0.46995970606803894, "step": 775, "train_speed(iter/s)": 0.189629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 732.0, "completions/mean_length": 501.71429443359375, "completions/min_length": 281.0, "entropy/max": 0.65234375, "entropy/mean": 0.435546875, "entropy/min": 0.2119140625, "epoch": 0.776, "grad_norm": 1.3481529811925332, "kl": 0.265625, "learning_rate": 2.4217614779212315e-07, "loss": 0.0026915818452835083, "memory(GiB)": 137.04, "reward": 1.8733131885528564, "reward_std": 0.23146864771842957, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6218430399894714, "rewards/EvidenceHallucination/std": 0.44950273633003235, "rewards/Evidence_Num_Record/mean": 4.952381134033203, "rewards/Evidence_Num_Record/std": 1.3784470558166504, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.6584683060646057, "rewards/VideoAccuracy/std": 0.35335877537727356, "step": 776, "train_speed(iter/s)": 0.187409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 932.0, "completions/mean_length": 449.1190490722656, "completions/min_length": 219.0, "entropy/max": 0.6171875, "entropy/mean": 0.408203125, "entropy/min": 0.1875, "epoch": 0.777, "grad_norm": 1.3426598800181293, "kl": 0.294921875, "learning_rate": 2.4010950121628313e-07, "loss": 0.002975872717797756, "memory(GiB)": 137.04, "reward": 1.9005649089813232, "reward_std": 0.3881090581417084, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48913657665252686, "rewards/EvidenceHallucination/std": 0.4540313482284546, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.3651119470596313, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.70273756980896, "rewards/VideoAccuracy/std": 0.6419118642807007, "step": 777, "train_speed(iter/s)": 0.185488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 844.0, "completions/mean_length": 608.5, "completions/min_length": 324.0, "entropy/max": 1.8125, "entropy/mean": 0.37890625, "entropy/min": 0.119140625, "epoch": 0.778, "grad_norm": 1.122146108228793, "kl": 0.2001953125, "learning_rate": 2.3805050672932925e-07, "loss": 0.0020415023900568485, "memory(GiB)": 137.04, "reward": 2.269925594329834, "reward_std": 0.07296941429376602, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7881034016609192, "rewards/EvidenceHallucination/std": 0.22882558405399323, "rewards/Evidence_Num_Record/mean": 5.142857074737549, "rewards/Evidence_Num_Record/std": 2.6279122829437256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9789714813232422, "rewards/VideoAccuracy/std": 0.22200588881969452, "step": 778, "train_speed(iter/s)": 0.182988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 794.0, "completions/mean_length": 517.6428833007812, "completions/min_length": 331.0, "entropy/max": 0.69140625, "entropy/mean": 0.453125, "entropy/min": 0.322265625, "epoch": 0.779, "grad_norm": 1.2723766867809578, "kl": 0.251953125, "learning_rate": 2.3599918506531336e-07, "loss": 0.002551523270085454, "memory(GiB)": 137.04, "reward": 1.8165538311004639, "reward_std": 0.2667812705039978, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5226373076438904, "rewards/EvidenceHallucination/std": 0.4287099838256836, "rewards/Evidence_Num_Record/mean": 5.11904764175415, "rewards/Evidence_Num_Record/std": 1.6260437965393066, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.6644073128700256, "rewards/VideoAccuracy/std": 0.41102248430252075, "step": 779, "train_speed(iter/s)": 0.181485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 641.0, "completions/mean_length": 464.69049072265625, "completions/min_length": 331.0, "entropy/max": 0.81640625, "entropy/mean": 0.439453125, "entropy/min": 0.31640625, "epoch": 0.78, "grad_norm": 1.2835722921065864, "kl": 0.267578125, "learning_rate": 2.339555568810221e-07, "loss": 0.002697822405025363, "memory(GiB)": 137.04, "reward": 1.4789576530456543, "reward_std": 0.3967013955116272, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.24236686527729034, "rewards/EvidenceHallucination/std": 0.39292433857917786, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 0.8913052678108215, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.3638175427913666, "rewards/VideoAccuracy/std": 0.5002002120018005, "step": 780, "train_speed(iter/s)": 0.179814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1041.0, "completions/mean_length": 590.7857055664062, "completions/min_length": 388.0, "entropy/max": 0.62890625, "entropy/mean": 0.294921875, "entropy/min": 0.1171875, "epoch": 0.781, "grad_norm": 0.9951126593890884, "kl": 0.2041015625, "learning_rate": 2.3191964275576803e-07, "loss": 0.0020998376421630383, "memory(GiB)": 137.04, "reward": 2.2755444049835205, "reward_std": 0.16309919953346252, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.647555947303772, "rewards/EvidenceHallucination/std": 0.44328615069389343, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9750069975852966, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9507946968078613, "rewards/VideoAccuracy/std": 0.459786981344223, "step": 781, "train_speed(iter/s)": 0.177398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 774.0, "completions/mean_length": 501.76190185546875, "completions/min_length": 373.0, "entropy/max": 1.140625, "entropy/mean": 0.52734375, "entropy/min": 0.330078125, "epoch": 0.782, "grad_norm": 1.2211936154390168, "kl": 0.27734375, "learning_rate": 2.2989146319118425e-07, "loss": 0.0028013205155730247, "memory(GiB)": 137.04, "reward": 1.6683571338653564, "reward_std": 0.16860167682170868, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45266225934028625, "rewards/EvidenceHallucination/std": 0.4449842572212219, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.0392975807189941, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5111579895019531, "rewards/VideoAccuracy/std": 0.43782246112823486, "step": 782, "train_speed(iter/s)": 0.175883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 772.0, "completions/mean_length": 487.8333435058594, "completions/min_length": 303.0, "entropy/max": 0.640625, "entropy/mean": 0.439453125, "entropy/min": 0.263671875, "epoch": 0.783, "grad_norm": 1.3299203101555763, "kl": 0.267578125, "learning_rate": 2.2787103861101653e-07, "loss": 0.002707436680793762, "memory(GiB)": 137.04, "reward": 1.4210991859436035, "reward_std": 0.37828224897384644, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2970438599586487, "rewards/EvidenceHallucination/std": 0.4277191460132599, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.007521152496338, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.32835710048675537, "rewards/VideoAccuracy/std": 0.42821741104125977, "step": 783, "train_speed(iter/s)": 0.174104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1081.0, "completions/mean_length": 589.7142944335938, "completions/min_length": 344.0, "entropy/max": 0.70703125, "entropy/mean": 0.375, "entropy/min": 0.1533203125, "epoch": 0.784, "grad_norm": 1.3517030280414586, "kl": 0.234375, "learning_rate": 2.258583893609175e-07, "loss": 0.0023501552641391754, "memory(GiB)": 137.04, "reward": 1.8731383085250854, "reward_std": 0.27096807956695557, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5219821333885193, "rewards/EvidenceHallucination/std": 0.4470759928226471, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 0.8006965517997742, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.6068370342254639, "rewards/VideoAccuracy/std": 0.48197415471076965, "step": 784, "train_speed(iter/s)": 0.172356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 911.0, "completions/mean_length": 534.90478515625, "completions/min_length": 276.0, "entropy/max": 0.6875, "entropy/mean": 0.408203125, "entropy/min": 0.138671875, "epoch": 0.785, "grad_norm": 1.1364964716258021, "kl": 0.24609375, "learning_rate": 2.2385353570824305e-07, "loss": 0.002507873810827732, "memory(GiB)": 137.04, "reward": 1.8118720054626465, "reward_std": 0.15822087228298187, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4680928885936737, "rewards/EvidenceHallucination/std": 0.4309382140636444, "rewards/Evidence_Num_Record/mean": 4.5, "rewards/Evidence_Num_Record/std": 1.7976950407028198, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.6182535290718079, "rewards/VideoAccuracy/std": 0.44967034459114075, "step": 785, "train_speed(iter/s)": 0.170848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 706.0, "completions/mean_length": 454.8571472167969, "completions/min_length": 345.0, "entropy/max": 0.875, "entropy/mean": 0.453125, "entropy/min": 0.302734375, "epoch": 0.786, "grad_norm": 1.3404990771945693, "kl": 0.267578125, "learning_rate": 2.2185649784184747e-07, "loss": 0.002728839172050357, "memory(GiB)": 137.04, "reward": 1.5887411832809448, "reward_std": 0.15206003189086914, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4058372378349304, "rewards/EvidenceHallucination/std": 0.4760162830352783, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.292343258857727, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.4504307508468628, "rewards/VideoAccuracy/std": 0.46277645230293274, "step": 786, "train_speed(iter/s)": 0.169354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 681.0, "completions/mean_length": 466.3809509277344, "completions/min_length": 340.0, "entropy/max": 0.66015625, "entropy/mean": 0.42578125, "entropy/min": 0.224609375, "epoch": 0.787, "grad_norm": 1.1613612062790581, "kl": 0.271484375, "learning_rate": 2.1986729587187958e-07, "loss": 0.0027181445620954037, "memory(GiB)": 137.04, "reward": 1.823914647102356, "reward_std": 0.251088947057724, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4527112543582916, "rewards/EvidenceHallucination/std": 0.4648779332637787, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 0.9055256843566895, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.638134241104126, "rewards/VideoAccuracy/std": 0.6499177813529968, "step": 787, "train_speed(iter/s)": 0.168382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 907.0, "completions/mean_length": 557.0, "completions/min_length": 365.0, "entropy/max": 0.91796875, "entropy/mean": 0.34765625, "entropy/min": 0.11572265625, "epoch": 0.788, "grad_norm": 1.0403974702132741, "kl": 0.2236328125, "learning_rate": 2.1788594982958086e-07, "loss": 0.0023073283955454826, "memory(GiB)": 137.04, "reward": 2.395468235015869, "reward_std": 0.1430952548980713, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6607549786567688, "rewards/EvidenceHallucination/std": 0.40233632922172546, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 1.129983901977539, "rewards/VideoAccuracy/std": 0.23416288197040558, "step": 788, "train_speed(iter/s)": 0.166278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 762.0, "completions/mean_length": 502.0238037109375, "completions/min_length": 354.0, "entropy/max": 0.71875, "entropy/mean": 0.46875, "entropy/min": 0.310546875, "epoch": 0.789, "grad_norm": 1.0809163218849382, "kl": 0.26171875, "learning_rate": 2.1591247966708426e-07, "loss": 0.002648875815793872, "memory(GiB)": 137.04, "reward": 1.705361008644104, "reward_std": 0.22425346076488495, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5174151659011841, "rewards/EvidenceHallucination/std": 0.4102669358253479, "rewards/Evidence_Num_Record/mean": 4.690476417541504, "rewards/Evidence_Num_Record/std": 1.7736291885375977, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5352112054824829, "rewards/VideoAccuracy/std": 0.4187927544116974, "step": 789, "train_speed(iter/s)": 0.165003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 734.0, "completions/mean_length": 478.8095397949219, "completions/min_length": 337.0, "entropy/max": 0.65625, "entropy/mean": 0.439453125, "entropy/min": 0.3359375, "epoch": 0.79, "grad_norm": 1.199193262453143, "kl": 0.287109375, "learning_rate": 2.139469052572127e-07, "loss": 0.0028981564100831747, "memory(GiB)": 137.04, "reward": 1.6890815496444702, "reward_std": 0.20440547168254852, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44685637950897217, "rewards/EvidenceHallucination/std": 0.43943482637405396, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 1.1220521926879883, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5330434441566467, "rewards/VideoAccuracy/std": 0.4751872420310974, "step": 790, "train_speed(iter/s)": 0.163633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 823.0, "completions/mean_length": 528.40478515625, "completions/min_length": 359.0, "entropy/max": 0.7265625, "entropy/mean": 0.30078125, "entropy/min": 0.1337890625, "epoch": 0.791, "grad_norm": 1.0779360237652633, "kl": 0.2275390625, "learning_rate": 2.1198924639327808e-07, "loss": 0.00232085888274014, "memory(GiB)": 137.04, "reward": 2.4378058910369873, "reward_std": 0.1298271268606186, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6932946443557739, "rewards/EvidenceHallucination/std": 0.3891344368457794, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.7344991564750671, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.0991469621658325, "rewards/VideoAccuracy/std": 0.21373583376407623, "step": 791, "train_speed(iter/s)": 0.161713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1042.0, "completions/mean_length": 544.3095092773438, "completions/min_length": 349.0, "entropy/max": 0.93359375, "entropy/mean": 0.47265625, "entropy/min": 0.275390625, "epoch": 0.792, "grad_norm": 1.10974018722686, "kl": 0.255859375, "learning_rate": 2.1003952278888382e-07, "loss": 0.002613186603412032, "memory(GiB)": 137.04, "reward": 1.7953269481658936, "reward_std": 0.1835445612668991, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.56269371509552, "rewards/EvidenceHallucination/std": 0.4327901303768158, "rewards/Evidence_Num_Record/mean": 5.357142925262451, "rewards/Evidence_Num_Record/std": 2.022090435028076, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6161215901374817, "rewards/VideoAccuracy/std": 0.41941821575164795, "step": 792, "train_speed(iter/s)": 0.160502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 724.0, "completions/mean_length": 425.3809509277344, "completions/min_length": 282.0, "entropy/max": 0.60546875, "entropy/mean": 0.451171875, "entropy/min": 0.306640625, "epoch": 0.793, "grad_norm": 1.0212667730806406, "kl": 0.291015625, "learning_rate": 2.08097754077725e-07, "loss": 0.00294006010517478, "memory(GiB)": 137.04, "reward": 1.322772741317749, "reward_std": 0.10889124125242233, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.19463838636875153, "rewards/EvidenceHallucination/std": 0.37754857540130615, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.2010449171066284, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777005434036255, "rewards/VideoAccuracy/mean": 0.26003551483154297, "rewards/VideoAccuracy/std": 0.3910863697528839, "step": 793, "train_speed(iter/s)": 0.158958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 962.0, "completions/mean_length": 566.357177734375, "completions/min_length": 351.0, "entropy/max": 0.56640625, "entropy/mean": 0.349609375, "entropy/min": 0.13671875, "epoch": 0.794, "grad_norm": 1.0006950576747762, "kl": 0.25, "learning_rate": 2.0616395981339073e-07, "loss": 0.002716131042689085, "memory(GiB)": 137.04, "reward": 1.928626537322998, "reward_std": 0.24823066592216492, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3988966643810272, "rewards/EvidenceHallucination/std": 0.4528437554836273, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 0.9016696214675903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6821804642677307, "rewards/VideoAccuracy/std": 0.5712240934371948, "step": 794, "train_speed(iter/s)": 0.157512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 883.0, "completions/mean_length": 594.1428833007812, "completions/min_length": 375.0, "entropy/max": 1.375, "entropy/mean": 0.44140625, "entropy/min": 0.1923828125, "epoch": 0.795, "grad_norm": 1.0493805498050126, "kl": 0.2158203125, "learning_rate": 2.042381594691678e-07, "loss": 0.00218667252920568, "memory(GiB)": 137.04, "reward": 2.0298008918762207, "reward_std": 0.08417399227619171, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5516841411590576, "rewards/EvidenceHallucination/std": 0.43587151169776917, "rewards/Evidence_Num_Record/mean": 4.88095235824585, "rewards/Evidence_Num_Record/std": 1.9025321006774902, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8194637298583984, "rewards/VideoAccuracy/std": 0.2984185516834259, "step": 795, "train_speed(iter/s)": 0.156147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 650.0, "completions/mean_length": 449.69049072265625, "completions/min_length": 300.0, "entropy/max": 0.734375, "entropy/mean": 0.4375, "entropy/min": 0.267578125, "epoch": 0.796, "grad_norm": 1.1736701624981345, "kl": 0.291015625, "learning_rate": 2.0232037243784472e-07, "loss": 0.0029487479478120804, "memory(GiB)": 137.04, "reward": 1.5112559795379639, "reward_std": 0.11511891335248947, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3990863263607025, "rewards/EvidenceHallucination/std": 0.4522339999675751, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.9358023405075073, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.39810535311698914, "rewards/VideoAccuracy/std": 0.3985377550125122, "step": 796, "train_speed(iter/s)": 0.154814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 595.0, "completions/mean_length": 446.90478515625, "completions/min_length": 339.0, "entropy/max": 0.5625, "entropy/mean": 0.41015625, "entropy/min": 0.30859375, "epoch": 0.797, "grad_norm": 1.2352760996533378, "kl": 0.306640625, "learning_rate": 2.0041061803151505e-07, "loss": 0.0030723009258508682, "memory(GiB)": 137.04, "reward": 2.0463526248931885, "reward_std": 0.21899209916591644, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5365279912948608, "rewards/EvidenceHallucination/std": 0.4506574273109436, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.8611501455307007, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.8438087105751038, "rewards/VideoAccuracy/std": 0.6808693408966064, "step": 797, "train_speed(iter/s)": 0.153683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/mean_length": 594.5952758789062, "completions/min_length": 345.0, "entropy/max": 0.6796875, "entropy/mean": 0.291015625, "entropy/min": 0.1552734375, "epoch": 0.798, "grad_norm": 0.8299365566704086, "kl": 0.2119140625, "learning_rate": 1.985089154813846e-07, "loss": 0.002190345199778676, "memory(GiB)": 137.04, "reward": 1.9640860557556152, "reward_std": 0.22710314393043518, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5165387392044067, "rewards/EvidenceHallucination/std": 0.45965853333473206, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 2.264711856842041, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.7322068810462952, "rewards/VideoAccuracy/std": 0.5266538262367249, "step": 798, "train_speed(iter/s)": 0.152043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 839.0, "completions/mean_length": 482.5714416503906, "completions/min_length": 281.0, "entropy/max": 1.0234375, "entropy/mean": 0.462890625, "entropy/min": 0.263671875, "epoch": 0.799, "grad_norm": 1.3890991623959492, "kl": 0.267578125, "learning_rate": 1.9661528393757742e-07, "loss": 0.0027191739063709974, "memory(GiB)": 137.04, "reward": 1.8365747928619385, "reward_std": 0.30120939016342163, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6418147683143616, "rewards/EvidenceHallucination/std": 0.4065225124359131, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.2598047256469727, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6415451765060425, "rewards/VideoAccuracy/std": 0.4152042269706726, "step": 799, "train_speed(iter/s)": 0.15078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 797.0, "completions/mean_length": 475.1428527832031, "completions/min_length": 336.0, "entropy/max": 0.59375, "entropy/mean": 0.447265625, "entropy/min": 0.287109375, "epoch": 0.8, "grad_norm": 1.034196467948775, "kl": 0.283203125, "learning_rate": 1.9472974246894136e-07, "loss": 0.002842884510755539, "memory(GiB)": 137.04, "reward": 1.516119122505188, "reward_std": 0.16130468249320984, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29011085629463196, "rewards/EvidenceHallucination/std": 0.4407220482826233, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.6803189516067505, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.42476359009742737, "rewards/VideoAccuracy/std": 0.5281235575675964, "step": 800, "train_speed(iter/s)": 0.148553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 936.0, "completions/mean_length": 558.3333740234375, "completions/min_length": 369.0, "entropy/max": 0.578125, "entropy/mean": 0.298828125, "entropy/min": 0.1337890625, "epoch": 0.801, "grad_norm": 1.0164229415578283, "kl": 0.2314453125, "learning_rate": 1.9285231006285853e-07, "loss": 0.0023471282329410315, "memory(GiB)": 137.04, "reward": 2.2122294902801514, "reward_std": 0.16193059086799622, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5297513008117676, "rewards/EvidenceHallucination/std": 0.40756309032440186, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.292343258857727, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9110410809516907, "rewards/VideoAccuracy/std": 0.4661368727684021, "step": 801, "train_speed(iter/s)": 0.146065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 858.0, "completions/mean_length": 506.69049072265625, "completions/min_length": 304.0, "entropy/max": 1.3828125, "entropy/mean": 0.51171875, "entropy/min": 0.275390625, "epoch": 0.802, "grad_norm": 1.0871509073724928, "kl": 0.267578125, "learning_rate": 1.9098300562505264e-07, "loss": 0.0027364350389689207, "memory(GiB)": 137.04, "reward": 1.5814732313156128, "reward_std": 0.09867032617330551, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37604111433029175, "rewards/EvidenceHallucination/std": 0.4459204375743866, "rewards/Evidence_Num_Record/mean": 5.1666669845581055, "rewards/Evidence_Num_Record/std": 1.5759884119033813, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.4491220712661743, "rewards/VideoAccuracy/std": 0.4343416392803192, "step": 802, "train_speed(iter/s)": 0.145075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 572.0, "completions/mean_length": 434.69049072265625, "completions/min_length": 246.0, "entropy/max": 0.55078125, "entropy/mean": 0.41015625, "entropy/min": 0.255859375, "epoch": 0.803, "grad_norm": 1.2846279119399278, "kl": 0.294921875, "learning_rate": 1.89121847979398e-07, "loss": 0.002950625028461218, "memory(GiB)": 137.04, "reward": 1.519728422164917, "reward_std": 0.18460945785045624, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3646612763404846, "rewards/EvidenceHallucination/std": 0.47237429022789, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.8873874545097351, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4467962384223938, "rewards/VideoAccuracy/std": 0.4420500099658966, "step": 803, "train_speed(iter/s)": 0.143886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/mean_length": 462.0714416503906, "completions/min_length": 382.0, "entropy/max": 0.53515625, "entropy/mean": 0.359375, "entropy/min": 0.1513671875, "epoch": 0.804, "grad_norm": 1.3904429529862117, "kl": 0.26171875, "learning_rate": 1.8726885586773211e-07, "loss": 0.0026322491466999054, "memory(GiB)": 137.04, "reward": 2.227130651473999, "reward_std": 0.16979330778121948, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6671773195266724, "rewards/EvidenceHallucination/std": 0.41588160395622253, "rewards/Evidence_Num_Record/mean": 3.5714285373687744, "rewards/Evidence_Num_Record/std": 0.8305994868278503, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.9317903518676758, "rewards/VideoAccuracy/std": 0.3766253590583801, "step": 804, "train_speed(iter/s)": 0.142855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1068.0, "completions/mean_length": 504.40478515625, "completions/min_length": 308.0, "entropy/max": 2.453125, "entropy/mean": 0.50390625, "entropy/min": 0.1748046875, "epoch": 0.805, "grad_norm": 1.2911063113332604, "kl": 0.2451171875, "learning_rate": 1.8542404794966427e-07, "loss": 0.002523736096918583, "memory(GiB)": 137.04, "reward": 2.2841567993164062, "reward_std": 0.05647649988532066, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.8305041790008545, "rewards/EvidenceHallucination/std": 0.14656081795692444, "rewards/Evidence_Num_Record/mean": 4.714285850524902, "rewards/Evidence_Num_Record/std": 2.1559414863586426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 1.022817850112915, "rewards/VideoAccuracy/std": 0.2014765441417694, "step": 805, "train_speed(iter/s)": 0.141462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 786.0, "completions/mean_length": 492.19049072265625, "completions/min_length": 349.0, "entropy/max": 0.68359375, "entropy/mean": 0.447265625, "entropy/min": 0.2412109375, "epoch": 0.806, "grad_norm": 1.0349966850865397, "kl": 0.263671875, "learning_rate": 1.8358744280239048e-07, "loss": 0.0030731498263776302, "memory(GiB)": 137.04, "reward": 1.6029757261276245, "reward_std": 0.09720693528652191, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37939584255218506, "rewards/EvidenceHallucination/std": 0.4710071086883545, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.1699390411376953, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.4651917517185211, "rewards/VideoAccuracy/std": 0.4290243685245514, "step": 806, "train_speed(iter/s)": 0.140522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/mean_length": 455.69049072265625, "completions/min_length": 287.0, "entropy/max": 0.6484375, "entropy/mean": 0.40234375, "entropy/min": 0.2392578125, "epoch": 0.807, "grad_norm": 1.2630643967050652, "kl": 0.279296875, "learning_rate": 1.8175905892050348e-07, "loss": 0.0028256457298994064, "memory(GiB)": 137.04, "reward": 2.0943901538848877, "reward_std": 0.21325063705444336, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6793032288551331, "rewards/EvidenceHallucination/std": 0.4282911419868469, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 1.1251531839370728, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.8632914423942566, "rewards/VideoAccuracy/std": 0.5636486411094666, "step": 807, "train_speed(iter/s)": 0.138582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1039.0, "completions/mean_length": 619.952392578125, "completions/min_length": 344.0, "entropy/max": 0.99609375, "entropy/mean": 0.369140625, "entropy/min": 0.1064453125, "epoch": 0.808, "grad_norm": 0.9905238036154161, "kl": 0.1982421875, "learning_rate": 1.7993891471580892e-07, "loss": 0.0020271213725209236, "memory(GiB)": 137.04, "reward": 1.9375890493392944, "reward_std": 0.13439425826072693, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5044342279434204, "rewards/EvidenceHallucination/std": 0.4065171182155609, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 1.2623374462127686, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7033689618110657, "rewards/VideoAccuracy/std": 0.3760945200920105, "step": 808, "train_speed(iter/s)": 0.137518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 975.0, "completions/mean_length": 511.9761962890625, "completions/min_length": 315.0, "entropy/max": 1.2265625, "entropy/mean": 0.42578125, "entropy/min": 0.2197265625, "epoch": 0.809, "grad_norm": 1.2915911693627269, "kl": 0.251953125, "learning_rate": 1.78127028517139e-07, "loss": 0.002545798197388649, "memory(GiB)": 137.04, "reward": 1.8296865224838257, "reward_std": 0.27932512760162354, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4577580392360687, "rewards/EvidenceHallucination/std": 0.4566575288772583, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.060591697692871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6714683175086975, "rewards/VideoAccuracy/std": 0.4044560194015503, "step": 809, "train_speed(iter/s)": 0.13566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 591.0, "completions/mean_length": 456.6428527832031, "completions/min_length": 335.0, "entropy/max": 0.58984375, "entropy/mean": 0.443359375, "entropy/min": 0.318359375, "epoch": 0.81, "grad_norm": 1.2097038316865059, "kl": 0.29296875, "learning_rate": 1.763234185701673e-07, "loss": 0.0029480045195668936, "memory(GiB)": 137.04, "reward": 1.1652276515960693, "reward_std": 0.1880207657814026, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.05281609669327736, "rewards/EvidenceHallucination/std": 0.19776032865047455, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.6357524394989014, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.12133102118968964, "rewards/VideoAccuracy/std": 0.2569405734539032, "step": 810, "train_speed(iter/s)": 0.135135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 696.0, "completions/mean_length": 489.69049072265625, "completions/min_length": 341.0, "entropy/max": 0.5703125, "entropy/mean": 0.30078125, "entropy/min": 0.1435546875, "epoch": 0.811, "grad_norm": 1.0259340548008493, "kl": 0.234375, "learning_rate": 1.7452810303722598e-07, "loss": 0.0023717356380075216, "memory(GiB)": 137.04, "reward": 2.467923641204834, "reward_std": 0.0862434059381485, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7541826367378235, "rewards/EvidenceHallucination/std": 0.36006125807762146, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.525759220123291, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9047619104385376, "rewards/HonestTime/std": 0.297101765871048, "rewards/VideoAccuracy/mean": 1.1361346244812012, "rewards/VideoAccuracy/std": 0.4355044364929199, "step": 811, "train_speed(iter/s)": 0.134143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 913.0, "completions/mean_length": 519.2619018554688, "completions/min_length": 311.0, "entropy/max": 1.515625, "entropy/mean": 0.515625, "entropy/min": 0.287109375, "epoch": 0.812, "grad_norm": 1.4005353557473488, "kl": 0.255859375, "learning_rate": 1.7274109999712294e-07, "loss": 0.002595985308289528, "memory(GiB)": 137.04, "reward": 1.8176084756851196, "reward_std": 0.33944687247276306, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.556121826171875, "rewards/EvidenceHallucination/std": 0.41618698835372925, "rewards/Evidence_Num_Record/mean": 5.023809432983398, "rewards/Evidence_Num_Record/std": 1.7034841775894165, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6730506420135498, "rewards/VideoAccuracy/std": 0.4331187307834625, "step": 812, "train_speed(iter/s)": 0.133025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 672.0, "completions/mean_length": 459.1190490722656, "completions/min_length": 306.0, "entropy/max": 0.734375, "entropy/mean": 0.439453125, "entropy/min": 0.294921875, "epoch": 0.813, "grad_norm": 1.1449873713008976, "kl": 0.259765625, "learning_rate": 1.7096242744495838e-07, "loss": 0.0026133707724511623, "memory(GiB)": 137.04, "reward": 1.391340970993042, "reward_std": 0.20070883631706238, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.27407148480415344, "rewards/EvidenceHallucination/std": 0.4221936762332916, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 1.1313297748565674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.33652666211128235, "rewards/VideoAccuracy/std": 0.4078528881072998, "step": 813, "train_speed(iter/s)": 0.13225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 742.0, "completions/mean_length": 492.4761962890625, "completions/min_length": 281.0, "entropy/max": 0.46875, "entropy/mean": 0.341796875, "entropy/min": 0.1455078125, "epoch": 0.814, "grad_norm": 1.1441413979379622, "kl": 0.25390625, "learning_rate": 1.6919210329194534e-07, "loss": 0.0025603468529880047, "memory(GiB)": 137.04, "reward": 2.121767282485962, "reward_std": 0.18712137639522552, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5634064674377441, "rewards/EvidenceHallucination/std": 0.4153032898902893, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.6595144867897034, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8424190282821655, "rewards/VideoAccuracy/std": 0.4346762001514435, "step": 814, "train_speed(iter/s)": 0.131271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 921.0, "completions/mean_length": 591.2142944335938, "completions/min_length": 274.0, "entropy/max": 1.7109375, "entropy/mean": 0.490234375, "entropy/min": 0.1640625, "epoch": 0.815, "grad_norm": 1.1221581338540028, "kl": 0.22265625, "learning_rate": 1.674301453652287e-07, "loss": 0.0022738249972462654, "memory(GiB)": 137.04, "reward": 1.8590971231460571, "reward_std": 0.3513486385345459, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3494276702404022, "rewards/EvidenceHallucination/std": 0.4168657064437866, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 2.0952911376953125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.6892116069793701, "rewards/VideoAccuracy/std": 0.5239997506141663, "step": 815, "train_speed(iter/s)": 0.130005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 826.0, "completions/mean_length": 473.3809509277344, "completions/min_length": 309.0, "entropy/max": 0.8203125, "entropy/mean": 0.4453125, "entropy/min": 0.294921875, "epoch": 0.816, "grad_norm": 1.2570679726908929, "kl": 0.2578125, "learning_rate": 1.6567657140770474e-07, "loss": 0.002602183260023594, "memory(GiB)": 137.04, "reward": 1.8965072631835938, "reward_std": 0.12467285245656967, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6578428149223328, "rewards/EvidenceHallucination/std": 0.3929106295108795, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.9927144646644592, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.7649388313293457, "rewards/VideoAccuracy/std": 0.4106225371360779, "step": 816, "train_speed(iter/s)": 0.129199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1051.0, "completions/mean_length": 488.5714416503906, "completions/min_length": 293.0, "entropy/max": 0.58984375, "entropy/mean": 0.375, "entropy/min": 0.1494140625, "epoch": 0.817, "grad_norm": 1.2585392025221378, "kl": 0.302734375, "learning_rate": 1.6393139907784403e-07, "loss": 0.0030637290328741074, "memory(GiB)": 137.04, "reward": 1.8628321886062622, "reward_std": 0.21498841047286987, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49281394481658936, "rewards/EvidenceHallucination/std": 0.4585222601890564, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.3216679096221924, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6642693877220154, "rewards/VideoAccuracy/std": 0.5453062057495117, "step": 817, "train_speed(iter/s)": 0.128316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 833.0, "completions/mean_length": 528.0238037109375, "completions/min_length": 349.0, "entropy/max": 0.9296875, "entropy/mean": 0.30859375, "entropy/min": 0.12890625, "epoch": 0.818, "grad_norm": 0.8079230123575736, "kl": 0.2255859375, "learning_rate": 1.621946459495127e-07, "loss": 0.002292902674525976, "memory(GiB)": 137.04, "reward": 2.204044818878174, "reward_std": 0.053180523216724396, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6027761101722717, "rewards/EvidenceHallucination/std": 0.4402962923049927, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 1.092950701713562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9501564502716064, "rewards/VideoAccuracy/std": 0.4471309781074524, "step": 818, "train_speed(iter/s)": 0.127354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 827.0, "completions/mean_length": 479.0952453613281, "completions/min_length": 358.0, "entropy/max": 0.8515625, "entropy/mean": 0.44140625, "entropy/min": 0.2578125, "epoch": 0.819, "grad_norm": 1.070203931568812, "kl": 0.267578125, "learning_rate": 1.6046632951179507e-07, "loss": 0.0027072113007307053, "memory(GiB)": 137.04, "reward": 1.77433180809021, "reward_std": 0.03583128750324249, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6151052713394165, "rewards/EvidenceHallucination/std": 0.46708250045776367, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.1526871919631958, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6179774403572083, "rewards/VideoAccuracy/std": 0.4577104449272156, "step": 819, "train_speed(iter/s)": 0.126477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 632.0, "completions/mean_length": 444.0476379394531, "completions/min_length": 229.0, "entropy/max": 0.56640625, "entropy/mean": 0.4296875, "entropy/min": 0.224609375, "epoch": 0.82, "grad_norm": 1.1627853387496583, "kl": 0.271484375, "learning_rate": 1.5874646716881868e-07, "loss": 0.002706526778638363, "memory(GiB)": 137.04, "reward": 1.6316003799438477, "reward_std": 0.21008215844631195, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40796494483947754, "rewards/EvidenceHallucination/std": 0.4385842978954315, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.8207527995109558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5166741013526917, "rewards/VideoAccuracy/std": 0.5705318450927734, "step": 820, "train_speed(iter/s)": 0.125693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 790.0, "completions/mean_length": 559.547607421875, "completions/min_length": 394.0, "entropy/max": 0.52734375, "entropy/mean": 0.3046875, "entropy/min": 0.140625, "epoch": 0.821, "grad_norm": 1.0176153711647942, "kl": 0.2109375, "learning_rate": 1.5703507623957847e-07, "loss": 0.0021406924352049828, "memory(GiB)": 137.04, "reward": 2.3748960494995117, "reward_std": 0.22574886679649353, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5159128904342651, "rewards/EvidenceHallucination/std": 0.4431808590888977, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9997095465660095, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 1.0764751434326172, "rewards/VideoAccuracy/std": 0.47842711210250854, "step": 821, "train_speed(iter/s)": 0.124855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1140.0, "completions/mean_length": 550.8333740234375, "completions/min_length": 338.0, "entropy/max": 0.69921875, "entropy/mean": 0.4453125, "entropy/min": 0.2431640625, "epoch": 0.822, "grad_norm": 1.0987707198826209, "kl": 0.2431640625, "learning_rate": 1.5533217395776188e-07, "loss": 0.002876720856875181, "memory(GiB)": 137.04, "reward": 1.8405925035476685, "reward_std": 0.22467979788780212, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5890406370162964, "rewards/EvidenceHallucination/std": 0.43359968066215515, "rewards/Evidence_Num_Record/mean": 5.476190567016602, "rewards/Evidence_Num_Record/std": 2.265737295150757, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6894509196281433, "rewards/VideoAccuracy/std": 0.4250394403934479, "step": 822, "train_speed(iter/s)": 0.124024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 742.0, "completions/mean_length": 456.7857360839844, "completions/min_length": 329.0, "entropy/max": 0.578125, "entropy/mean": 0.44140625, "entropy/min": 0.2734375, "epoch": 0.823, "grad_norm": 1.4579151535626775, "kl": 0.291015625, "learning_rate": 1.536377774715757e-07, "loss": 0.0029377522878348827, "memory(GiB)": 137.04, "reward": 1.6815420389175415, "reward_std": 0.44127708673477173, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5124199986457825, "rewards/EvidenceHallucination/std": 0.4615868031978607, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.0169869661331177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.579058051109314, "rewards/VideoAccuracy/std": 0.4553752541542053, "step": 823, "train_speed(iter/s)": 0.123081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 934.0, "completions/mean_length": 484.0952453613281, "completions/min_length": 354.0, "entropy/max": 0.51171875, "entropy/mean": 0.36328125, "entropy/min": 0.10205078125, "epoch": 0.824, "grad_norm": 1.2073259135000711, "kl": 0.255859375, "learning_rate": 1.5195190384357404e-07, "loss": 0.00258562620729208, "memory(GiB)": 137.04, "reward": 2.0996549129486084, "reward_std": 0.11016413569450378, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5467841029167175, "rewards/EvidenceHallucination/std": 0.44064101576805115, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.7501451969146729, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8236314654350281, "rewards/VideoAccuracy/std": 0.3638036251068115, "step": 824, "train_speed(iter/s)": 0.122403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 877.0, "completions/mean_length": 534.8095092773438, "completions/min_length": 385.0, "entropy/max": 1.609375, "entropy/mean": 0.453125, "entropy/min": 0.1767578125, "epoch": 0.825, "grad_norm": 1.0607292377664361, "kl": 0.2314453125, "learning_rate": 1.5027457005048572e-07, "loss": 0.0023629399947822094, "memory(GiB)": 137.04, "reward": 1.9367122650146484, "reward_std": 0.22675660252571106, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5748409032821655, "rewards/EvidenceHallucination/std": 0.40622395277023315, "rewards/Evidence_Num_Record/mean": 4.476190567016602, "rewards/Evidence_Num_Record/std": 1.7283587455749512, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.7217440009117126, "rewards/VideoAccuracy/std": 0.4476911723613739, "step": 825, "train_speed(iter/s)": 0.121546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 833.0, "completions/mean_length": 466.5476379394531, "completions/min_length": 355.0, "entropy/max": 0.828125, "entropy/mean": 0.4453125, "entropy/min": 0.25390625, "epoch": 0.826, "grad_norm": 1.2128384339632001, "kl": 0.26171875, "learning_rate": 1.486057929830431e-07, "loss": 0.002626287518069148, "memory(GiB)": 137.04, "reward": 1.6029353141784668, "reward_std": 0.28161782026290894, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4569338858127594, "rewards/EvidenceHallucination/std": 0.4684075713157654, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.6957966089248657, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5115485787391663, "rewards/VideoAccuracy/std": 0.4963649809360504, "step": 826, "train_speed(iter/s)": 0.120613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 650.0, "completions/mean_length": 449.8095397949219, "completions/min_length": 309.0, "entropy/max": 0.8359375, "entropy/mean": 0.466796875, "entropy/min": 0.302734375, "epoch": 0.827, "grad_norm": 1.2506186078462593, "kl": 0.287109375, "learning_rate": 1.469455894458129e-07, "loss": 0.002896510064601898, "memory(GiB)": 137.04, "reward": 1.549792766571045, "reward_std": 0.17999663949012756, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2444106489419937, "rewards/EvidenceHallucination/std": 0.39539089798927307, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 0.9770894646644592, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.40091055631637573, "rewards/VideoAccuracy/std": 0.5077887773513794, "step": 827, "train_speed(iter/s)": 0.120254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 823.0, "completions/mean_length": 586.357177734375, "completions/min_length": 373.0, "entropy/max": 0.890625, "entropy/mean": 0.30078125, "entropy/min": 0.1455078125, "epoch": 0.828, "grad_norm": 1.0323021294234873, "kl": 0.203125, "learning_rate": 1.4529397615702654e-07, "loss": 0.002064808737486601, "memory(GiB)": 137.04, "reward": 2.132803201675415, "reward_std": 0.2657012939453125, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6109729409217834, "rewards/EvidenceHallucination/std": 0.4012887179851532, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.916046142578125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8772752285003662, "rewards/VideoAccuracy/std": 0.4796368479728699, "step": 828, "train_speed(iter/s)": 0.118909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 819.0, "completions/mean_length": 471.8095397949219, "completions/min_length": 284.0, "entropy/max": 0.7734375, "entropy/mean": 0.4921875, "entropy/min": 0.3515625, "epoch": 0.829, "grad_norm": 1.2255006294837825, "kl": 0.26953125, "learning_rate": 1.4365096974841106e-07, "loss": 0.002734632696956396, "memory(GiB)": 137.04, "reward": 1.8650703430175781, "reward_std": 0.21632973849773407, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6693183183670044, "rewards/EvidenceHallucination/std": 0.41198858618736267, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 1.5469801425933838, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.7026352882385254, "rewards/VideoAccuracy/std": 0.4336860179901123, "step": 829, "train_speed(iter/s)": 0.118299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 655.0, "completions/mean_length": 462.8571472167969, "completions/min_length": 286.0, "entropy/max": 0.8359375, "entropy/mean": 0.421875, "entropy/min": 0.2578125, "epoch": 0.83, "grad_norm": 1.2870548016908077, "kl": 0.283203125, "learning_rate": 1.4201658676502293e-07, "loss": 0.0028648152947425842, "memory(GiB)": 137.04, "reward": 1.3752903938293457, "reward_std": 0.33857226371765137, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21359898149967194, "rewards/EvidenceHallucination/std": 0.38932594656944275, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.9122345447540283, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.2992372214794159, "rewards/VideoAccuracy/std": 0.3674658536911011, "step": 830, "train_speed(iter/s)": 0.11768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 792.0, "completions/mean_length": 542.357177734375, "completions/min_length": 388.0, "entropy/max": 0.5, "entropy/mean": 0.28515625, "entropy/min": 0.150390625, "epoch": 0.831, "grad_norm": 0.9540566926721836, "kl": 0.2197265625, "learning_rate": 1.4039084366508092e-07, "loss": 0.0022365141194313765, "memory(GiB)": 137.04, "reward": 2.3009817600250244, "reward_std": 0.11945509910583496, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6476714611053467, "rewards/EvidenceHallucination/std": 0.42851969599723816, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.7904775738716125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.9762094020843506, "rewards/VideoAccuracy/std": 0.3568113446235657, "step": 831, "train_speed(iter/s)": 0.11708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 901.0, "completions/mean_length": 488.5238037109375, "completions/min_length": 262.0, "entropy/max": 0.8515625, "entropy/mean": 0.482421875, "entropy/min": 0.232421875, "epoch": 0.832, "grad_norm": 0.9672332619213181, "kl": 0.275390625, "learning_rate": 1.3877375681979942e-07, "loss": 0.002822866663336754, "memory(GiB)": 137.04, "reward": 1.3189421892166138, "reward_std": 0.1224055141210556, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22665101289749146, "rewards/EvidenceHallucination/std": 0.3877301812171936, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 1.5002902746200562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777005434036255, "rewards/VideoAccuracy/mean": 0.249802365899086, "rewards/VideoAccuracy/std": 0.40132516622543335, "step": 832, "train_speed(iter/s)": 0.115738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 560.0, "completions/mean_length": 428.7857360839844, "completions/min_length": 232.0, "entropy/max": 0.76171875, "entropy/mean": 0.423828125, "entropy/min": 0.265625, "epoch": 0.833, "grad_norm": 1.2916113779099452, "kl": 0.294921875, "learning_rate": 1.3716534251322544e-07, "loss": 0.0029611135832965374, "memory(GiB)": 137.36, "reward": 1.519522786140442, "reward_std": 0.27905896306037903, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35443753004074097, "rewards/EvidenceHallucination/std": 0.4601483643054962, "rewards/Evidence_Num_Record/mean": 3.809523820877075, "rewards/Evidence_Num_Record/std": 0.6339229345321655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.41530200839042664, "rewards/VideoAccuracy/std": 0.44496962428092957, "step": 833, "train_speed(iter/s)": 0.11507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 627.0, "completions/mean_length": 469.66668701171875, "completions/min_length": 345.0, "entropy/max": 0.81640625, "entropy/mean": 0.3828125, "entropy/min": 0.1708984375, "epoch": 0.834, "grad_norm": 1.314431795447688, "kl": 0.259765625, "learning_rate": 1.3556561694207335e-07, "loss": 0.0026252754032611847, "memory(GiB)": 137.36, "reward": 2.1789443492889404, "reward_std": 0.21588024497032166, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.666461706161499, "rewards/EvidenceHallucination/std": 0.431160569190979, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.7213357090950012, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8789854049682617, "rewards/VideoAccuracy/std": 0.44793781638145447, "step": 834, "train_speed(iter/s)": 0.114428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1016.0, "completions/mean_length": 519.5952758789062, "completions/min_length": 277.0, "entropy/max": 1.1015625, "entropy/mean": 0.4375, "entropy/min": 0.12451171875, "epoch": 0.835, "grad_norm": 1.2486714801507632, "kl": 0.2470703125, "learning_rate": 1.3397459621556128e-07, "loss": 0.0025551673024892807, "memory(GiB)": 137.36, "reward": 1.9654521942138672, "reward_std": 0.1467912495136261, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5538205504417419, "rewards/EvidenceHallucination/std": 0.4164559543132782, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 2.4175124168395996, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.7594498991966248, "rewards/VideoAccuracy/std": 0.5330292582511902, "step": 835, "train_speed(iter/s)": 0.113663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 696.0, "completions/mean_length": 505.5476379394531, "completions/min_length": 360.0, "entropy/max": 0.60546875, "entropy/mean": 0.4140625, "entropy/min": 0.2890625, "epoch": 0.836, "grad_norm": 1.3787251891598633, "kl": 0.263671875, "learning_rate": 1.3239229635525073e-07, "loss": 0.0026535876095294952, "memory(GiB)": 137.36, "reward": 1.7774924039840698, "reward_std": 0.31256455183029175, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5785791873931885, "rewards/EvidenceHallucination/std": 0.46408405900001526, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 0.8742011189460754, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.661776602268219, "rewards/VideoAccuracy/std": 0.43136221170425415, "step": 836, "train_speed(iter/s)": 0.112896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 753.0, "completions/mean_length": 440.5476379394531, "completions/min_length": 328.0, "entropy/max": 0.578125, "entropy/mean": 0.40625, "entropy/min": 0.28125, "epoch": 0.837, "grad_norm": 1.327097986710748, "kl": 0.283203125, "learning_rate": 1.3081873329488392e-07, "loss": 0.002853620797395706, "memory(GiB)": 137.36, "reward": 2.0167486667633057, "reward_std": 0.39193081855773926, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6177124977111816, "rewards/EvidenceHallucination/std": 0.4071316123008728, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 0.9122345447540283, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.7932060956954956, "rewards/VideoAccuracy/std": 0.4522143006324768, "step": 837, "train_speed(iter/s)": 0.11227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 716.0, "completions/mean_length": 502.76190185546875, "completions/min_length": 352.0, "entropy/max": 0.83984375, "entropy/mean": 0.33203125, "entropy/min": 0.08447265625, "epoch": 0.838, "grad_norm": 1.1545769651299362, "kl": 0.2373046875, "learning_rate": 1.2925392288022296e-07, "loss": 0.0024269253481179476, "memory(GiB)": 137.36, "reward": 2.115060329437256, "reward_std": 0.2253529280424118, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6825776100158691, "rewards/EvidenceHallucination/std": 0.3816542327404022, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 1.0777013301849365, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8452115654945374, "rewards/VideoAccuracy/std": 0.4711548089981079, "step": 838, "train_speed(iter/s)": 0.11117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1786.0, "completions/mean_length": 523.6190795898438, "completions/min_length": 269.0, "entropy/max": 0.9765625, "entropy/mean": 0.435546875, "entropy/min": 0.2060546875, "epoch": 0.839, "grad_norm": 1.0796540588414187, "kl": 0.271484375, "learning_rate": 1.2769788086889132e-07, "loss": 0.0028046760708093643, "memory(GiB)": 137.36, "reward": 1.6544684171676636, "reward_std": 0.13489244878292084, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.49136263132095337, "rewards/EvidenceHallucination/std": 0.43816834688186646, "rewards/Evidence_Num_Record/mean": 5.142857074737549, "rewards/Evidence_Num_Record/std": 3.6597445011138916, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5228625535964966, "rewards/VideoAccuracy/std": 0.47375646233558655, "step": 839, "train_speed(iter/s)": 0.110558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 607.0, "completions/mean_length": 402.16668701171875, "completions/min_length": 241.0, "entropy/max": 0.76953125, "entropy/mean": 0.416015625, "entropy/min": 0.271484375, "epoch": 0.84, "grad_norm": 1.3338339500336278, "kl": 0.291015625, "learning_rate": 1.2615062293021506e-07, "loss": 0.0029495495837181807, "memory(GiB)": 137.36, "reward": 1.7845600843429565, "reward_std": 0.11220282316207886, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.528125524520874, "rewards/EvidenceHallucination/std": 0.44493892788887024, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.8025076985359192, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6456018090248108, "rewards/VideoAccuracy/std": 0.5069880485534668, "step": 840, "train_speed(iter/s)": 0.109998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 931.0, "completions/mean_length": 549.1190795898438, "completions/min_length": 354.0, "entropy/max": 0.490234375, "entropy/mean": 0.275390625, "entropy/min": 0.1259765625, "epoch": 0.841, "grad_norm": 1.0705962781580627, "kl": 0.2158203125, "learning_rate": 1.2461216464506452e-07, "loss": 0.002181172836571932, "memory(GiB)": 137.36, "reward": 2.2544729709625244, "reward_std": 0.1595495343208313, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5502666234970093, "rewards/EvidenceHallucination/std": 0.427486389875412, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.7213357090950012, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9444196224212646, "rewards/VideoAccuracy/std": 0.4263104200363159, "step": 841, "train_speed(iter/s)": 0.109322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 805.0, "completions/mean_length": 478.19049072265625, "completions/min_length": 316.0, "entropy/max": 1.1875, "entropy/mean": 0.5, "entropy/min": 0.302734375, "epoch": 0.842, "grad_norm": 1.3309944067739075, "kl": 0.275390625, "learning_rate": 1.230825215056971e-07, "loss": 0.0028127585537731647, "memory(GiB)": 137.36, "reward": 1.7678550481796265, "reward_std": 0.2609485387802124, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5329591035842896, "rewards/EvidenceHallucination/std": 0.45397794246673584, "rewards/Evidence_Num_Record/mean": 4.523809432983398, "rewards/Evidence_Num_Record/std": 1.3477731943130493, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.599358320236206, "rewards/VideoAccuracy/std": 0.42538169026374817, "step": 842, "train_speed(iter/s)": 0.108668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 725.0, "completions/mean_length": 440.73809814453125, "completions/min_length": 347.0, "entropy/max": 0.5546875, "entropy/mean": 0.412109375, "entropy/min": 0.2333984375, "epoch": 0.843, "grad_norm": 1.3374249909047569, "kl": 0.283203125, "learning_rate": 1.2156170891560258e-07, "loss": 0.002839302644133568, "memory(GiB)": 137.36, "reward": 1.734251856803894, "reward_std": 0.4089145064353943, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5525057911872864, "rewards/EvidenceHallucination/std": 0.4349856674671173, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 1.2650946378707886, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6237506866455078, "rewards/VideoAccuracy/std": 0.4713672697544098, "step": 843, "train_speed(iter/s)": 0.10802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 906.0, "completions/mean_length": 569.7142944335938, "completions/min_length": 368.0, "entropy/max": 0.6953125, "entropy/mean": 0.330078125, "entropy/min": 0.1396484375, "epoch": 0.844, "grad_norm": 1.1355950308818543, "kl": 0.2314453125, "learning_rate": 1.2004974218934695e-07, "loss": 0.0023405367974191904, "memory(GiB)": 137.36, "reward": 1.9341176748275757, "reward_std": 0.10934612154960632, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3932000696659088, "rewards/EvidenceHallucination/std": 0.4373791217803955, "rewards/Evidence_Num_Record/mean": 4.547619342803955, "rewards/Evidence_Num_Record/std": 1.253334641456604, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6888108253479004, "rewards/VideoAccuracy/std": 0.4891456365585327, "step": 844, "train_speed(iter/s)": 0.107671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 824.0, "completions/mean_length": 487.7857360839844, "completions/min_length": 244.0, "entropy/max": 0.8984375, "entropy/mean": 0.384765625, "entropy/min": 0.140625, "epoch": 0.845, "grad_norm": 0.9592076721527633, "kl": 0.25390625, "learning_rate": 1.1854663655241804e-07, "loss": 0.0025939876213669777, "memory(GiB)": 137.36, "reward": 1.7634539604187012, "reward_std": 0.1565001904964447, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3028511703014374, "rewards/EvidenceHallucination/std": 0.4015829861164093, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.1974129676818848, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.6076455116271973, "rewards/VideoAccuracy/std": 0.5249733328819275, "step": 845, "train_speed(iter/s)": 0.107127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 884.0, "completions/mean_length": 471.3333435058594, "completions/min_length": 274.0, "entropy/max": 0.58984375, "entropy/mean": 0.44140625, "entropy/min": 0.279296875, "epoch": 0.846, "grad_norm": 1.2116800723488819, "kl": 0.26953125, "learning_rate": 1.1705240714107301e-07, "loss": 0.0027307234704494476, "memory(GiB)": 137.36, "reward": 1.3947242498397827, "reward_std": 0.18065707385540009, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.23713630437850952, "rewards/EvidenceHallucination/std": 0.41608914732933044, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.9422956109046936, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.2901540696620941, "rewards/VideoAccuracy/std": 0.3921188414096832, "step": 846, "train_speed(iter/s)": 0.10601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 644.0, "completions/mean_length": 454.5476379394531, "completions/min_length": 320.0, "entropy/max": 0.625, "entropy/mean": 0.41796875, "entropy/min": 0.2099609375, "epoch": 0.847, "grad_norm": 1.215591605280868, "kl": 0.283203125, "learning_rate": 1.1556706900218572e-07, "loss": 0.002849389798939228, "memory(GiB)": 137.36, "reward": 2.086452007293701, "reward_std": 0.2277895212173462, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6672782301902771, "rewards/EvidenceHallucination/std": 0.43294113874435425, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 1.1435829401016235, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.862520158290863, "rewards/VideoAccuracy/std": 0.5838239789009094, "step": 847, "train_speed(iter/s)": 0.10549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1009.0, "completions/mean_length": 611.0714111328125, "completions/min_length": 381.0, "entropy/max": 0.6796875, "entropy/mean": 0.306640625, "entropy/min": 0.166015625, "epoch": 0.848, "grad_norm": 0.6460856343573428, "kl": 0.2099609375, "learning_rate": 1.140906370930944e-07, "loss": 0.0021102060563862324, "memory(GiB)": 137.36, "reward": 1.7929471731185913, "reward_std": 0.03765248879790306, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4066600501537323, "rewards/EvidenceHallucination/std": 0.4503607451915741, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.2570359706878662, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5782817602157593, "rewards/VideoAccuracy/std": 0.5222153067588806, "step": 848, "train_speed(iter/s)": 0.104894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 692.0, "completions/mean_length": 470.2857360839844, "completions/min_length": 333.0, "entropy/max": 1.671875, "entropy/mean": 0.470703125, "entropy/min": 0.2421875, "epoch": 0.849, "grad_norm": 1.1901820052487082, "kl": 0.267578125, "learning_rate": 1.1262312628145209e-07, "loss": 0.002717760857194662, "memory(GiB)": 137.36, "reward": 1.6252732276916504, "reward_std": 0.2671740651130676, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4483039081096649, "rewards/EvidenceHallucination/std": 0.47909459471702576, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.188407063484192, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.46894586086273193, "rewards/VideoAccuracy/std": 0.4558504819869995, "step": 849, "train_speed(iter/s)": 0.104379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 691.0, "completions/mean_length": 454.3809509277344, "completions/min_length": 299.0, "entropy/max": 0.65234375, "entropy/mean": 0.42578125, "entropy/min": 0.2890625, "epoch": 0.85, "grad_norm": 1.2423254127926724, "kl": 0.267578125, "learning_rate": 1.1116455134507663e-07, "loss": 0.002680886536836624, "memory(GiB)": 137.36, "reward": 1.6951377391815186, "reward_std": 0.19957676529884338, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4258413016796112, "rewards/EvidenceHallucination/std": 0.45843377709388733, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.8530195355415344, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5480645895004272, "rewards/VideoAccuracy/std": 0.5059059262275696, "step": 850, "train_speed(iter/s)": 0.103933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 753.0, "completions/mean_length": 511.4761962890625, "completions/min_length": 355.0, "entropy/max": 0.69921875, "entropy/mean": 0.3203125, "entropy/min": 0.107421875, "epoch": 0.851, "grad_norm": 1.0218613332915403, "kl": 0.2236328125, "learning_rate": 1.0971492697180096e-07, "loss": 0.002279686275869608, "memory(GiB)": 137.36, "reward": 2.298257350921631, "reward_std": 0.2220420241355896, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6890178918838501, "rewards/EvidenceHallucination/std": 0.37429994344711304, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.9830147624015808, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.969977617263794, "rewards/VideoAccuracy/std": 0.38433244824409485, "step": 851, "train_speed(iter/s)": 0.103381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 874.0, "completions/mean_length": 491.3333435058594, "completions/min_length": 245.0, "entropy/max": 1.2109375, "entropy/mean": 0.47265625, "entropy/min": 0.19921875, "epoch": 0.852, "grad_norm": 1.2509115609704713, "kl": 0.275390625, "learning_rate": 1.0827426775932657e-07, "loss": 0.0027865557931363583, "memory(GiB)": 137.36, "reward": 1.699042797088623, "reward_std": 0.28074491024017334, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5076565742492676, "rewards/EvidenceHallucination/std": 0.47236886620521545, "rewards/Evidence_Num_Record/mean": 4.761904716491699, "rewards/Evidence_Num_Record/std": 1.2259297370910645, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5356067419052124, "rewards/VideoAccuracy/std": 0.46379873156547546, "step": 852, "train_speed(iter/s)": 0.102783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 602.0, "completions/mean_length": 429.5, "completions/min_length": 307.0, "entropy/max": 0.89453125, "entropy/mean": 0.48046875, "entropy/min": 0.283203125, "epoch": 0.853, "grad_norm": 1.368874538721325, "kl": 0.275390625, "learning_rate": 1.0684258821507618e-07, "loss": 0.0027739896904677153, "memory(GiB)": 137.36, "reward": 1.7046847343444824, "reward_std": 0.44494467973709106, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5178847908973694, "rewards/EvidenceHallucination/std": 0.42951691150665283, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.5961549282073975, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6011077165603638, "rewards/VideoAccuracy/std": 0.4659145176410675, "step": 853, "train_speed(iter/s)": 0.102279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 690.0, "completions/mean_length": 457.5476379394531, "completions/min_length": 333.0, "entropy/max": 0.5546875, "entropy/mean": 0.330078125, "entropy/min": 0.1416015625, "epoch": 0.854, "grad_norm": 1.151241175784308, "kl": 0.271484375, "learning_rate": 1.0541990275604628e-07, "loss": 0.0027510307263582945, "memory(GiB)": 137.36, "reward": 2.28006911277771, "reward_std": 0.09352787584066391, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7334737181663513, "rewards/EvidenceHallucination/std": 0.3510028123855591, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.9833102226257324, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9667075276374817, "rewards/VideoAccuracy/std": 0.4173412024974823, "step": 854, "train_speed(iter/s)": 0.101843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/mean_length": 488.90478515625, "completions/min_length": 327.0, "entropy/max": 1.46875, "entropy/mean": 0.396484375, "entropy/min": 0.1494140625, "epoch": 0.855, "grad_norm": 1.136655172700135, "kl": 0.25390625, "learning_rate": 1.0400622570866425e-07, "loss": 0.0026231100782752037, "memory(GiB)": 137.36, "reward": 1.914324164390564, "reward_std": 0.13127605617046356, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5789222121238708, "rewards/EvidenceHallucination/std": 0.427633672952652, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 1.7598239183425903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.6985397934913635, "rewards/VideoAccuracy/std": 0.4905802309513092, "step": 855, "train_speed(iter/s)": 0.101345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 714.0, "completions/mean_length": 488.5238037109375, "completions/min_length": 343.0, "entropy/max": 0.72265625, "entropy/mean": 0.419921875, "entropy/min": 0.265625, "epoch": 0.856, "grad_norm": 1.2209167269679986, "kl": 0.26953125, "learning_rate": 1.0260157130864177e-07, "loss": 0.0027050748467445374, "memory(GiB)": 137.36, "reward": 1.7837170362472534, "reward_std": 0.2790336608886719, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.566267192363739, "rewards/EvidenceHallucination/std": 0.4373977482318878, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 1.2061110734939575, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.6418920159339905, "rewards/VideoAccuracy/std": 0.429427832365036, "step": 856, "train_speed(iter/s)": 0.100324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 612.0, "completions/mean_length": 461.66668701171875, "completions/min_length": 321.0, "entropy/max": 0.55078125, "entropy/mean": 0.400390625, "entropy/min": 0.267578125, "epoch": 0.857, "grad_norm": 1.1477013837218386, "kl": 0.28125, "learning_rate": 1.0120595370083318e-07, "loss": 0.0028410381637513638, "memory(GiB)": 137.36, "reward": 1.9754301309585571, "reward_std": 0.0912606343626976, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6005602478981018, "rewards/EvidenceHallucination/std": 0.45697399973869324, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 0.8781778216362, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.7553179860115051, "rewards/VideoAccuracy/std": 0.5490778684616089, "step": 857, "train_speed(iter/s)": 0.099872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 842.0, "completions/mean_length": 579.0714111328125, "completions/min_length": 369.0, "entropy/max": 0.734375, "entropy/mean": 0.31640625, "entropy/min": 0.1357421875, "epoch": 0.858, "grad_norm": 0.6323285661858963, "kl": 0.21875, "learning_rate": 9.981938693909219e-08, "loss": 0.002220054157078266, "memory(GiB)": 137.36, "reward": 1.7847347259521484, "reward_std": 0.15355658531188965, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.34096771478652954, "rewards/EvidenceHallucination/std": 0.44314488768577576, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 0.9258201122283936, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5832078456878662, "rewards/VideoAccuracy/std": 0.5091155171394348, "step": 858, "train_speed(iter/s)": 0.099318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 817.0, "completions/mean_length": 493.8095397949219, "completions/min_length": 312.0, "entropy/max": 1.015625, "entropy/mean": 0.4921875, "entropy/min": 0.314453125, "epoch": 0.859, "grad_norm": 1.250741159341449, "kl": 0.265625, "learning_rate": 9.844188498613115e-08, "loss": 0.0026930745225399733, "memory(GiB)": 137.36, "reward": 1.8230197429656982, "reward_std": 0.07269556820392609, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5855136513710022, "rewards/EvidenceHallucination/std": 0.45245108008384705, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 1.2287685871124268, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500064849853516, "rewards/VideoAccuracy/mean": 0.6535361409187317, "rewards/VideoAccuracy/std": 0.39689022302627563, "step": 859, "train_speed(iter/s)": 0.098873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 644.0, "completions/mean_length": 456.0476379394531, "completions/min_length": 334.0, "entropy/max": 0.49609375, "entropy/mean": 0.40234375, "entropy/min": 0.294921875, "epoch": 0.86, "grad_norm": 1.0915671993006, "kl": 0.283203125, "learning_rate": 9.707346171337893e-08, "loss": 0.0028428146615624428, "memory(GiB)": 137.36, "reward": 1.299574851989746, "reward_std": 0.2713920772075653, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.20511110126972198, "rewards/EvidenceHallucination/std": 0.3983006775379181, "rewards/Evidence_Num_Record/mean": 3.809523820877075, "rewards/Evidence_Num_Record/std": 0.7066960334777832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.22521927952766418, "rewards/VideoAccuracy/std": 0.4099663496017456, "step": 860, "train_speed(iter/s)": 0.098626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 873.0, "completions/mean_length": 544.5, "completions/min_length": 354.0, "entropy/max": 0.7578125, "entropy/mean": 0.29296875, "entropy/min": 0.11572265625, "epoch": 0.861, "grad_norm": 1.0238063997560405, "kl": 0.2197265625, "learning_rate": 9.57141309008428e-08, "loss": 0.0022320393472909927, "memory(GiB)": 137.36, "reward": 2.289512872695923, "reward_std": 0.2071124017238617, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6585298180580139, "rewards/EvidenceHallucination/std": 0.41074296832084656, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.6115421056747437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430335700511932, "rewards/VideoAccuracy/mean": 0.9625687599182129, "rewards/VideoAccuracy/std": 0.27861785888671875, "step": 861, "train_speed(iter/s)": 0.098147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 818.0, "completions/mean_length": 493.1428527832031, "completions/min_length": 363.0, "entropy/max": 1.09375, "entropy/mean": 0.494140625, "entropy/min": 0.2890625, "epoch": 0.862, "grad_norm": 1.360844426339108, "kl": 0.271484375, "learning_rate": 9.43639062369691e-08, "loss": 0.0027274617459625006, "memory(GiB)": 137.36, "reward": 1.956334114074707, "reward_std": 0.2271970808506012, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7514772415161133, "rewards/EvidenceHallucination/std": 0.3589233458042145, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.0581248998641968, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.261904776096344, "rewards/HonestTime/std": 0.44500061869621277, "rewards/VideoAccuracy/mean": 0.7536576986312866, "rewards/VideoAccuracy/std": 0.3471892178058624, "step": 862, "train_speed(iter/s)": 0.0977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 556.0, "completions/mean_length": 417.5714416503906, "completions/min_length": 311.0, "entropy/max": 0.609375, "entropy/mean": 0.419921875, "entropy/min": 0.25390625, "epoch": 0.863, "grad_norm": 1.34293589689136, "kl": 0.298828125, "learning_rate": 9.302280131850537e-08, "loss": 0.0029917543288320303, "memory(GiB)": 137.36, "reward": 1.5938074588775635, "reward_std": 0.3932831287384033, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4868219494819641, "rewards/EvidenceHallucination/std": 0.4731104373931885, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 0.7066960334777832, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.496442973613739, "rewards/VideoAccuracy/std": 0.48178577423095703, "step": 863, "train_speed(iter/s)": 0.097267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 675.0, "completions/mean_length": 466.2857360839844, "completions/min_length": 337.0, "entropy/max": 0.5859375, "entropy/mean": 0.35546875, "entropy/min": 0.2197265625, "epoch": 0.864, "grad_norm": 1.3350307015516478, "kl": 0.26953125, "learning_rate": 9.169082965036279e-08, "loss": 0.002724633552134037, "memory(GiB)": 137.36, "reward": 1.9755473136901855, "reward_std": 0.145391583442688, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.46336421370506287, "rewards/EvidenceHallucination/std": 0.44457826018333435, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.9865530729293823, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7162078022956848, "rewards/VideoAccuracy/std": 0.46055346727371216, "step": 864, "train_speed(iter/s)": 0.096809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1012.0, "completions/mean_length": 562.6428833007812, "completions/min_length": 296.0, "entropy/max": 1.7890625, "entropy/mean": 0.458984375, "entropy/min": 0.1474609375, "epoch": 0.865, "grad_norm": 1.0681975181453227, "kl": 0.2265625, "learning_rate": 9.036800464548156e-08, "loss": 0.0022891086991876364, "memory(GiB)": 137.36, "reward": 1.952393651008606, "reward_std": 0.16332533955574036, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5214911103248596, "rewards/EvidenceHallucination/std": 0.40741944313049316, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.2869396209716797, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.7480955719947815, "rewards/VideoAccuracy/std": 0.4404449760913849, "step": 865, "train_speed(iter/s)": 0.096226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 667.0, "completions/mean_length": 486.3809509277344, "completions/min_length": 357.0, "entropy/max": 0.83984375, "entropy/mean": 0.453125, "entropy/min": 0.318359375, "epoch": 0.866, "grad_norm": 1.2752987021411784, "kl": 0.267578125, "learning_rate": 8.905433962469488e-08, "loss": 0.002698513213545084, "memory(GiB)": 137.36, "reward": 1.9704946279525757, "reward_std": 0.11606656759977341, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7517710328102112, "rewards/EvidenceHallucination/std": 0.3492864966392517, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 0.9323829412460327, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.786807119846344, "rewards/VideoAccuracy/std": 0.3002590239048004, "step": 866, "train_speed(iter/s)": 0.095796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 635.0, "completions/mean_length": 447.0238037109375, "completions/min_length": 265.0, "entropy/max": 0.60546875, "entropy/mean": 0.41015625, "entropy/min": 0.296875, "epoch": 0.867, "grad_norm": 1.274235978961333, "kl": 0.27734375, "learning_rate": 8.774984781659468e-08, "loss": 0.002792106010019779, "memory(GiB)": 137.36, "reward": 1.7536741495132446, "reward_std": 0.22508135437965393, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3733477294445038, "rewards/EvidenceHallucination/std": 0.42099037766456604, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.9055256247520447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.5837664008140564, "rewards/VideoAccuracy/std": 0.5010433197021484, "step": 867, "train_speed(iter/s)": 0.095443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1025.0, "completions/mean_length": 591.7857055664062, "completions/min_length": 335.0, "entropy/max": 0.5625, "entropy/mean": 0.318359375, "entropy/min": 0.13671875, "epoch": 0.868, "grad_norm": 0.6722452828309774, "kl": 0.2080078125, "learning_rate": 8.645454235739902e-08, "loss": 0.002102708211168647, "memory(GiB)": 137.41, "reward": 1.9160841703414917, "reward_std": 0.08268817514181137, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5511731505393982, "rewards/EvidenceHallucination/std": 0.42706412076950073, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.1323559284210205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6725161075592041, "rewards/VideoAccuracy/std": 0.5182072520256042, "step": 868, "train_speed(iter/s)": 0.094723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 896.0, "completions/mean_length": 500.8571472167969, "completions/min_length": 334.0, "entropy/max": 1.3359375, "entropy/mean": 0.49609375, "entropy/min": 0.287109375, "epoch": 0.869, "grad_norm": 0.789281956188977, "kl": 0.275390625, "learning_rate": 8.516843629081982e-08, "loss": 0.002797728404402733, "memory(GiB)": 137.41, "reward": 1.5103896856307983, "reward_std": 0.07082901895046234, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3638714551925659, "rewards/EvidenceHallucination/std": 0.451041042804718, "rewards/Evidence_Num_Record/mean": 4.761904716491699, "rewards/Evidence_Num_Record/std": 1.7503941059112549, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.3709486722946167, "rewards/VideoAccuracy/std": 0.40454044938087463, "step": 869, "train_speed(iter/s)": 0.094391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 697.0, "completions/mean_length": 445.73809814453125, "completions/min_length": 307.0, "entropy/max": 0.59765625, "entropy/mean": 0.42578125, "entropy/min": 0.30078125, "epoch": 0.87, "grad_norm": 1.1826294572355491, "kl": 0.29296875, "learning_rate": 8.38915425679304e-08, "loss": 0.0029431432485580444, "memory(GiB)": 137.41, "reward": 1.4018317461013794, "reward_std": 0.2824872136116028, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3030472993850708, "rewards/EvidenceHallucination/std": 0.4373262822628021, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.8742011785507202, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3078889548778534, "rewards/VideoAccuracy/std": 0.38509246706962585, "step": 870, "train_speed(iter/s)": 0.094011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 792.0, "completions/mean_length": 540.357177734375, "completions/min_length": 365.0, "entropy/max": 0.47265625, "entropy/mean": 0.27734375, "entropy/min": 0.13671875, "epoch": 0.871, "grad_norm": 1.0241036917869137, "kl": 0.2216796875, "learning_rate": 8.262387404703653e-08, "loss": 0.002246022457256913, "memory(GiB)": 137.41, "reward": 2.128763437271118, "reward_std": 0.10026715695858002, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5795870423316956, "rewards/EvidenceHallucination/std": 0.4270126521587372, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.0017406940460205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8176080584526062, "rewards/VideoAccuracy/std": 0.3870820999145508, "step": 871, "train_speed(iter/s)": 0.093529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 893.0, "completions/mean_length": 547.6428833007812, "completions/min_length": 356.0, "entropy/max": 0.97265625, "entropy/mean": 0.484375, "entropy/min": 0.240234375, "epoch": 0.872, "grad_norm": 1.2728577199743507, "kl": 0.2431640625, "learning_rate": 8.136544349354668e-08, "loss": 0.0024901535362005234, "memory(GiB)": 137.41, "reward": 1.6112569570541382, "reward_std": 0.3500445783138275, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4395892918109894, "rewards/EvidenceHallucination/std": 0.4364795684814453, "rewards/Evidence_Num_Record/mean": 5.0, "rewards/Evidence_Num_Record/std": 1.6527878046035767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722997188568115, "rewards/VideoAccuracy/mean": 0.4661962389945984, "rewards/VideoAccuracy/std": 0.43624967336654663, "step": 872, "train_speed(iter/s)": 0.093007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 593.0, "completions/mean_length": 406.73809814453125, "completions/min_length": 250.0, "entropy/max": 0.734375, "entropy/mean": 0.484375, "entropy/min": 0.294921875, "epoch": 0.873, "grad_norm": 1.1136326946388309, "kl": 0.291015625, "learning_rate": 8.01162635798418e-08, "loss": 0.00293728057295084, "memory(GiB)": 137.41, "reward": 1.378351092338562, "reward_std": 0.18517830967903137, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.28729623556137085, "rewards/EvidenceHallucination/std": 0.4380473494529724, "rewards/Evidence_Num_Record/mean": 3.3809523582458496, "rewards/Evidence_Num_Record/std": 0.8540400862693787, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.3208918273448944, "rewards/VideoAccuracy/std": 0.43048128485679626, "step": 873, "train_speed(iter/s)": 0.092613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1011.0, "completions/mean_length": 487.452392578125, "completions/min_length": 254.0, "entropy/max": 0.640625, "entropy/mean": 0.3671875, "entropy/min": 0.1533203125, "epoch": 0.874, "grad_norm": 1.138857625156919, "kl": 0.263671875, "learning_rate": 7.887634688515e-08, "loss": 0.0026560970582067966, "memory(GiB)": 137.41, "reward": 1.9813612699508667, "reward_std": 0.15701758861541748, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4765368103981018, "rewards/EvidenceHallucination/std": 0.48360008001327515, "rewards/Evidence_Num_Record/mean": 3.452381134033203, "rewards/Evidence_Num_Record/std": 0.66999751329422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.7241489887237549, "rewards/VideoAccuracy/std": 0.41951704025268555, "step": 874, "train_speed(iter/s)": 0.092173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1131.0, "completions/mean_length": 573.0238037109375, "completions/min_length": 268.0, "entropy/max": 0.73046875, "entropy/mean": 0.376953125, "entropy/min": 0.1328125, "epoch": 0.875, "grad_norm": 1.2268666216419313, "kl": 0.203125, "learning_rate": 7.764570589541875e-08, "loss": 0.0021050162613391876, "memory(GiB)": 137.41, "reward": 1.972877860069275, "reward_std": 0.15945619344711304, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6702647805213928, "rewards/EvidenceHallucination/std": 0.4148913025856018, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 1.555030107498169, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.7388249635696411, "rewards/VideoAccuracy/std": 0.34591543674468994, "step": 875, "train_speed(iter/s)": 0.091599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 805.0, "completions/mean_length": 473.3809509277344, "completions/min_length": 363.0, "entropy/max": 0.71484375, "entropy/mean": 0.435546875, "entropy/min": 0.232421875, "epoch": 0.876, "grad_norm": 1.2740935503447899, "kl": 0.279296875, "learning_rate": 7.642435300318906e-08, "loss": 0.002823675749823451, "memory(GiB)": 137.41, "reward": 1.4506222009658813, "reward_std": 0.34449562430381775, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.32069632411003113, "rewards/EvidenceHallucination/std": 0.4599875807762146, "rewards/Evidence_Num_Record/mean": 3.809523820877075, "rewards/Evidence_Num_Record/std": 0.7726449966430664, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3531496524810791, "rewards/VideoAccuracy/std": 0.42197105288505554, "step": 876, "train_speed(iter/s)": 0.091264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 625.0, "completions/mean_length": 461.73809814453125, "completions/min_length": 349.0, "entropy/max": 0.578125, "entropy/mean": 0.412109375, "entropy/min": 0.29296875, "epoch": 0.877, "grad_norm": 1.1481544117424332, "kl": 0.287109375, "learning_rate": 7.521230050747085e-08, "loss": 0.002876377198845148, "memory(GiB)": 137.41, "reward": 1.7416545152664185, "reward_std": 0.2787608802318573, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3635919392108917, "rewards/EvidenceHallucination/std": 0.47230517864227295, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.7589956521987915, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.568936288356781, "rewards/VideoAccuracy/std": 0.5389463901519775, "step": 877, "train_speed(iter/s)": 0.090939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1051.0, "completions/mean_length": 621.452392578125, "completions/min_length": 351.0, "entropy/max": 0.63671875, "entropy/mean": 0.345703125, "entropy/min": 0.125, "epoch": 0.878, "grad_norm": 0.9383968788323271, "kl": 0.1962890625, "learning_rate": 7.400956061361974e-08, "loss": 0.002009030431509018, "memory(GiB)": 137.41, "reward": 1.842140793800354, "reward_std": 0.17816469073295593, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45321619510650635, "rewards/EvidenceHallucination/std": 0.46109721064567566, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 1.0155584812164307, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.6181641817092896, "rewards/VideoAccuracy/std": 0.47660982608795166, "step": 878, "train_speed(iter/s)": 0.09053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/mean_length": 491.7857360839844, "completions/min_length": 352.0, "entropy/max": 0.78125, "entropy/mean": 0.421875, "entropy/min": 0.22265625, "epoch": 0.879, "grad_norm": 1.1225403470000348, "kl": 0.2490234375, "learning_rate": 7.281614543321269e-08, "loss": 0.0025496752932667732, "memory(GiB)": 137.41, "reward": 1.6172040700912476, "reward_std": 0.21314892172813416, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3995124399662018, "rewards/EvidenceHallucination/std": 0.45061445236206055, "rewards/Evidence_Num_Record/mean": 4.785714149475098, "rewards/Evidence_Num_Record/std": 1.5227657556533813, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.47063493728637695, "rewards/VideoAccuracy/std": 0.43577468395233154, "step": 879, "train_speed(iter/s)": 0.090139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 659.0, "completions/mean_length": 468.19049072265625, "completions/min_length": 347.0, "entropy/max": 0.6640625, "entropy/mean": 0.453125, "entropy/min": 0.2421875, "epoch": 0.88, "grad_norm": 1.3583495100760876, "kl": 0.28125, "learning_rate": 7.163206698392742e-08, "loss": 0.0028377859853208065, "memory(GiB)": 137.41, "reward": 1.78130304813385, "reward_std": 0.3314023017883301, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.564808189868927, "rewards/EvidenceHallucination/std": 0.47428396344184875, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 0.9236221313476562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6350079774856567, "rewards/VideoAccuracy/std": 0.3911062777042389, "step": 880, "train_speed(iter/s)": 0.089636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 890.0, "completions/mean_length": 610.2380981445312, "completions/min_length": 385.0, "entropy/max": 0.546875, "entropy/mean": 0.248046875, "entropy/min": 0.12255859375, "epoch": 0.881, "grad_norm": 0.9674632861169625, "kl": 0.1982421875, "learning_rate": 7.045733718942093e-08, "loss": 0.001996932551264763, "memory(GiB)": 137.41, "reward": 2.190370798110962, "reward_std": 0.09444907307624817, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5754888653755188, "rewards/EvidenceHallucination/std": 0.42112043499946594, "rewards/Evidence_Num_Record/mean": 4.333333492279053, "rewards/Evidence_Num_Record/std": 0.9283257722854614, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8752728700637817, "rewards/VideoAccuracy/std": 0.47302642464637756, "step": 881, "train_speed(iter/s)": 0.089201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 969.0, "completions/mean_length": 537.0238037109375, "completions/min_length": 375.0, "entropy/max": 1.0078125, "entropy/mean": 0.462890625, "entropy/min": 0.287109375, "epoch": 0.882, "grad_norm": 1.0925018062641088, "kl": 0.2470703125, "learning_rate": 6.929196787920898e-08, "loss": 0.0025138126220554113, "memory(GiB)": 137.41, "reward": 1.5892995595932007, "reward_std": 0.17472247779369354, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4005925953388214, "rewards/EvidenceHallucination/std": 0.4204840660095215, "rewards/Evidence_Num_Record/mean": 5.309524059295654, "rewards/Evidence_Num_Record/std": 1.7598239183425903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.44727635383605957, "rewards/VideoAccuracy/std": 0.46331316232681274, "step": 882, "train_speed(iter/s)": 0.088791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 680.0, "completions/mean_length": 441.5952453613281, "completions/min_length": 271.0, "entropy/max": 0.60546875, "entropy/mean": 0.474609375, "entropy/min": 0.337890625, "epoch": 0.883, "grad_norm": 1.3718549355783753, "kl": 0.283203125, "learning_rate": 6.813597078854771e-08, "loss": 0.0028338287957012653, "memory(GiB)": 137.41, "reward": 1.533994197845459, "reward_std": 0.32422956824302673, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.40407299995422363, "rewards/EvidenceHallucination/std": 0.4569137990474701, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.9606062173843384, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4531796872615814, "rewards/VideoAccuracy/std": 0.4807124733924866, "step": 883, "train_speed(iter/s)": 0.088476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 761.0, "completions/mean_length": 475.76190185546875, "completions/min_length": 269.0, "entropy/max": 0.54296875, "entropy/mean": 0.349609375, "entropy/min": 0.1494140625, "epoch": 0.884, "grad_norm": 1.1966819555119639, "kl": 0.271484375, "learning_rate": 6.698935755831491e-08, "loss": 0.002740682801231742, "memory(GiB)": 137.41, "reward": 2.1745049953460693, "reward_std": 0.1737610399723053, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5192246437072754, "rewards/EvidenceHallucination/std": 0.4441063702106476, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.9422956705093384, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.9039933681488037, "rewards/VideoAccuracy/std": 0.4034653306007385, "step": 884, "train_speed(iter/s)": 0.088056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 790.0, "completions/mean_length": 516.2380981445312, "completions/min_length": 331.0, "entropy/max": 0.921875, "entropy/mean": 0.447265625, "entropy/min": 0.1513671875, "epoch": 0.885, "grad_norm": 1.2399208440837544, "kl": 0.2333984375, "learning_rate": 6.585213973489334e-08, "loss": 0.002357909455895424, "memory(GiB)": 137.41, "reward": 2.129850387573242, "reward_std": 0.20503221452236176, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6250701546669006, "rewards/EvidenceHallucination/std": 0.4147346615791321, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.3236435651779175, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.9048364162445068, "rewards/VideoAccuracy/std": 0.3743656873703003, "step": 885, "train_speed(iter/s)": 0.087565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 770.0, "completions/mean_length": 458.1190490722656, "completions/min_length": 276.0, "entropy/max": 0.58984375, "entropy/mean": 0.416015625, "entropy/min": 0.28125, "epoch": 0.886, "grad_norm": 1.5532292659103142, "kl": 0.275390625, "learning_rate": 6.47243287700534e-08, "loss": 0.002769982907921076, "memory(GiB)": 137.41, "reward": 1.8790358304977417, "reward_std": 0.3673816919326782, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6465736031532288, "rewards/EvidenceHallucination/std": 0.41805994510650635, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 1.0373399257659912, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7163876891136169, "rewards/VideoAccuracy/std": 0.3747093677520752, "step": 886, "train_speed(iter/s)": 0.087111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/mean_length": 462.9285888671875, "completions/min_length": 252.0, "entropy/max": 1.0078125, "entropy/mean": 0.453125, "entropy/min": 0.232421875, "epoch": 0.887, "grad_norm": 1.463237188359426, "kl": 0.2890625, "learning_rate": 6.36059360208394e-08, "loss": 0.0029151299968361855, "memory(GiB)": 137.41, "reward": 2.2597317695617676, "reward_std": 0.24623370170593262, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7909353375434875, "rewards/EvidenceHallucination/std": 0.28417184948921204, "rewards/Evidence_Num_Record/mean": 3.9285714626312256, "rewards/Evidence_Num_Record/std": 1.134661316871643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 1.00154447555542, "rewards/VideoAccuracy/std": 0.3549065887928009, "step": 887, "train_speed(iter/s)": 0.086794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 729.0, "completions/mean_length": 519.1428833007812, "completions/min_length": 365.0, "entropy/max": 2.015625, "entropy/mean": 0.5, "entropy/min": 0.1474609375, "epoch": 0.888, "grad_norm": 0.9038730392780432, "kl": 0.21484375, "learning_rate": 6.249697274945376e-08, "loss": 0.0021830867044627666, "memory(GiB)": 137.41, "reward": 2.1366453170776367, "reward_std": 0.11492623388767242, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5206296443939209, "rewards/EvidenceHallucination/std": 0.4495990574359894, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 1.187673807144165, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.8991861343383789, "rewards/VideoAccuracy/std": 0.43332386016845703, "step": 888, "train_speed(iter/s)": 0.086372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 663.0, "completions/mean_length": 459.21429443359375, "completions/min_length": 336.0, "entropy/max": 0.6015625, "entropy/mean": 0.431640625, "entropy/min": 0.251953125, "epoch": 0.889, "grad_norm": 1.2730359639274378, "kl": 0.2734375, "learning_rate": 6.139745012314424e-08, "loss": 0.0027574487030506134, "memory(GiB)": 137.41, "reward": 1.5221045017242432, "reward_std": 0.2713756859302521, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29796910285949707, "rewards/EvidenceHallucination/std": 0.4305155873298645, "rewards/Evidence_Num_Record/mean": 4.095238208770752, "rewards/Evidence_Num_Record/std": 1.1220521926879883, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.40536782145500183, "rewards/VideoAccuracy/std": 0.4380069673061371, "step": 889, "train_speed(iter/s)": 0.08605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/mean_length": 459.0952453613281, "completions/min_length": 315.0, "entropy/max": 0.63671875, "entropy/mean": 0.451171875, "entropy/min": 0.28515625, "epoch": 0.89, "grad_norm": 1.322065101605198, "kl": 0.287109375, "learning_rate": 6.030737921409168e-08, "loss": 0.0028813458047807217, "memory(GiB)": 137.41, "reward": 1.7936220169067383, "reward_std": 0.32686707377433777, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5598907470703125, "rewards/EvidenceHallucination/std": 0.47290104627609253, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 0.6270147562026978, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6483103632926941, "rewards/VideoAccuracy/std": 0.5596532225608826, "step": 890, "train_speed(iter/s)": 0.085746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 854.0, "completions/mean_length": 546.6190795898438, "completions/min_length": 354.0, "entropy/max": 0.53515625, "entropy/mean": 0.2890625, "entropy/min": 0.1650390625, "epoch": 0.891, "grad_norm": 1.110845425550562, "kl": 0.2119140625, "learning_rate": 5.922677099929785e-08, "loss": 0.0021479499991983175, "memory(GiB)": 137.41, "reward": 2.1060032844543457, "reward_std": 0.19177255034446716, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5316746234893799, "rewards/EvidenceHallucination/std": 0.4688880145549774, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7083376049995422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.8044303059577942, "rewards/VideoAccuracy/std": 0.47716856002807617, "step": 891, "train_speed(iter/s)": 0.085401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 656.0, "completions/mean_length": 490.6190490722656, "completions/min_length": 349.0, "entropy/max": 0.83984375, "entropy/mean": 0.4765625, "entropy/min": 0.248046875, "epoch": 0.892, "grad_norm": 1.217363528182602, "kl": 0.271484375, "learning_rate": 5.815563636047538e-08, "loss": 0.0027382380794733763, "memory(GiB)": 137.41, "reward": 1.88263738155365, "reward_std": 0.06644676625728607, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6405651569366455, "rewards/EvidenceHallucination/std": 0.40935197472572327, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 0.9912509322166443, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.6926195621490479, "rewards/VideoAccuracy/std": 0.3781779706478119, "step": 892, "train_speed(iter/s)": 0.085099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 685.0, "completions/mean_length": 404.0238037109375, "completions/min_length": 284.0, "entropy/max": 0.9375, "entropy/mean": 0.44140625, "entropy/min": 0.296875, "epoch": 0.893, "grad_norm": 1.3202198516584376, "kl": 0.28515625, "learning_rate": 5.709398608393834e-08, "loss": 0.0028688169550150633, "memory(GiB)": 137.41, "reward": 1.6975760459899902, "reward_std": 0.41498932242393494, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4748455286026001, "rewards/EvidenceHallucination/std": 0.46483904123306274, "rewards/Evidence_Num_Record/mean": 3.3333334922790527, "rewards/Evidence_Num_Record/std": 0.845841109752655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6026068925857544, "rewards/VideoAccuracy/std": 0.44790560007095337, "step": 893, "train_speed(iter/s)": 0.084791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/mean_length": 461.71429443359375, "completions/min_length": 292.0, "entropy/max": 0.5390625, "entropy/mean": 0.3515625, "entropy/min": 0.19140625, "epoch": 0.894, "grad_norm": 1.077120693480762, "kl": 0.259765625, "learning_rate": 5.604183086049341e-08, "loss": 0.0026304530911147594, "memory(GiB)": 137.41, "reward": 2.1575303077697754, "reward_std": 0.07329066842794418, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5564228296279907, "rewards/EvidenceHallucination/std": 0.43415695428848267, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 0.9385906457901001, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8795791864395142, "rewards/VideoAccuracy/std": 0.6343855261802673, "step": 894, "train_speed(iter/s)": 0.084489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 732.0, "completions/mean_length": 502.66668701171875, "completions/min_length": 314.0, "entropy/max": 1.4375, "entropy/mean": 0.404296875, "entropy/min": 0.1552734375, "epoch": 0.895, "grad_norm": 1.0951229106785407, "kl": 0.2412109375, "learning_rate": 5.499918128533154e-08, "loss": 0.0024560079909861088, "memory(GiB)": 137.41, "reward": 1.926005244255066, "reward_std": 0.12339666485786438, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5627981424331665, "rewards/EvidenceHallucination/std": 0.4098535180091858, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.9358023405075073, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.7134456038475037, "rewards/VideoAccuracy/std": 0.3909820020198822, "step": 895, "train_speed(iter/s)": 0.084102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 590.0, "completions/mean_length": 455.73809814453125, "completions/min_length": 345.0, "entropy/max": 0.5625, "entropy/mean": 0.404296875, "entropy/min": 0.296875, "epoch": 0.896, "grad_norm": 1.2009173901435557, "kl": 0.294921875, "learning_rate": 5.39660478579228e-08, "loss": 0.002986327512189746, "memory(GiB)": 137.41, "reward": 1.6633399724960327, "reward_std": 0.07493476569652557, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5234918594360352, "rewards/EvidenceHallucination/std": 0.48352640867233276, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.5808600187301636, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.5348320007324219, "rewards/VideoAccuracy/std": 0.462007999420166, "step": 896, "train_speed(iter/s)": 0.083796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 630.0, "completions/mean_length": 468.5714416503906, "completions/min_length": 321.0, "entropy/max": 0.69921875, "entropy/mean": 0.4140625, "entropy/min": 0.283203125, "epoch": 0.897, "grad_norm": 1.4394225019863833, "kl": 0.2890625, "learning_rate": 5.294244098190925e-08, "loss": 0.0029065608978271484, "memory(GiB)": 137.41, "reward": 2.087952136993408, "reward_std": 0.3857279419898987, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6915156841278076, "rewards/EvidenceHallucination/std": 0.3970063030719757, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.7624308466911316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8496488928794861, "rewards/VideoAccuracy/std": 0.465589314699173, "step": 897, "train_speed(iter/s)": 0.083513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 970.0, "completions/mean_length": 542.6428833007812, "completions/min_length": 407.0, "entropy/max": 0.703125, "entropy/mean": 0.294921875, "entropy/min": 0.134765625, "epoch": 0.898, "grad_norm": 0.9412662227413006, "kl": 0.2197265625, "learning_rate": 5.192837096500058e-08, "loss": 0.0022464762441813946, "memory(GiB)": 137.41, "reward": 1.9782623052597046, "reward_std": 0.1456795185804367, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4908020794391632, "rewards/EvidenceHallucination/std": 0.42891359329223633, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 1.445084571838379, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7467685341835022, "rewards/VideoAccuracy/std": 0.4523145854473114, "step": 898, "train_speed(iter/s)": 0.083157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 808.0, "completions/mean_length": 483.8095397949219, "completions/min_length": 317.0, "entropy/max": 0.8046875, "entropy/mean": 0.44140625, "entropy/min": 0.28515625, "epoch": 0.899, "grad_norm": 1.2302754346798352, "kl": 0.263671875, "learning_rate": 5.092384801887073e-08, "loss": 0.002651178278028965, "memory(GiB)": 137.41, "reward": 1.5938938856124878, "reward_std": 0.21898728609085083, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.41778501868247986, "rewards/EvidenceHallucination/std": 0.4507313072681427, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.1160845756530762, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.45319414138793945, "rewards/VideoAccuracy/std": 0.42926546931266785, "step": 899, "train_speed(iter/s)": 0.08286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 676.0, "completions/mean_length": 478.23809814453125, "completions/min_length": 337.0, "entropy/max": 0.60546875, "entropy/mean": 0.44921875, "entropy/min": 0.302734375, "epoch": 0.9, "grad_norm": 1.3553010967243564, "kl": 0.28125, "learning_rate": 4.992888225905467e-08, "loss": 0.002824255730956793, "memory(GiB)": 137.41, "reward": 1.6197789907455444, "reward_std": 0.23516342043876648, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3931371569633484, "rewards/EvidenceHallucination/std": 0.483131468296051, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.2699053287506104, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2380952388048172, "rewards/HonestTime/std": 0.43108054995536804, "rewards/VideoAccuracy/mean": 0.49353253841400146, "rewards/VideoAccuracy/std": 0.48202645778656006, "step": 900, "train_speed(iter/s)": 0.082597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 904.0, "completions/mean_length": 549.8333740234375, "completions/min_length": 338.0, "entropy/max": 0.52734375, "entropy/mean": 0.330078125, "entropy/min": 0.1298828125, "epoch": 0.901, "grad_norm": 1.0452126125239998, "kl": 0.2138671875, "learning_rate": 4.8943483704846465e-08, "loss": 0.002178219147026539, "memory(GiB)": 137.41, "reward": 2.160017251968384, "reward_std": 0.20481643080711365, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6286323070526123, "rewards/EvidenceHallucination/std": 0.39143964648246765, "rewards/Evidence_Num_Record/mean": 3.6666667461395264, "rewards/Evidence_Num_Record/std": 0.7543909549713135, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8342907428741455, "rewards/VideoAccuracy/std": 0.4933658540248871, "step": 901, "train_speed(iter/s)": 0.081945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/mean_length": 483.90478515625, "completions/min_length": 358.0, "entropy/max": 1.0703125, "entropy/mean": 0.4765625, "entropy/min": 0.287109375, "epoch": 0.902, "grad_norm": 1.2072911926562326, "kl": 0.28515625, "learning_rate": 4.796766227919857e-08, "loss": 0.0028953691944479942, "memory(GiB)": 137.41, "reward": 1.8647540807724, "reward_std": 0.21534258127212524, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6785789132118225, "rewards/EvidenceHallucination/std": 0.4169284403324127, "rewards/Evidence_Num_Record/mean": 4.857142925262451, "rewards/Evidence_Num_Record/std": 1.704676866531372, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.6623717546463013, "rewards/VideoAccuracy/std": 0.4006807208061218, "step": 902, "train_speed(iter/s)": 0.081629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 869.0, "completions/mean_length": 456.9761962890625, "completions/min_length": 260.0, "entropy/max": 0.55078125, "entropy/mean": 0.431640625, "entropy/min": 0.294921875, "epoch": 0.903, "grad_norm": 1.0096162813731613, "kl": 0.255859375, "learning_rate": 4.700142780862204e-08, "loss": 0.0025641187094151974, "memory(GiB)": 137.41, "reward": 1.1655939817428589, "reward_std": 0.25843915343284607, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.1373104751110077, "rewards/EvidenceHallucination/std": 0.3412381708621979, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 1.6115148067474365, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.13813185691833496, "rewards/VideoAccuracy/std": 0.3316902816295624, "step": 903, "train_speed(iter/s)": 0.081432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1014.0, "completions/mean_length": 514.5952758789062, "completions/min_length": 322.0, "entropy/max": 0.52734375, "entropy/mean": 0.326171875, "entropy/min": 0.1279296875, "epoch": 0.904, "grad_norm": 1.1391754145138482, "kl": 0.26171875, "learning_rate": 4.6044790023087364e-08, "loss": 0.0026426189579069614, "memory(GiB)": 137.41, "reward": 2.224207878112793, "reward_std": 0.1719508022069931, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6544135808944702, "rewards/EvidenceHallucination/std": 0.4259692430496216, "rewards/Evidence_Num_Record/mean": 3.8809523582458496, "rewards/Evidence_Num_Record/std": 0.9160460829734802, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.785714328289032, "rewards/HonestTime/std": 0.41529974341392517, "rewards/VideoAccuracy/mean": 0.9361823797225952, "rewards/VideoAccuracy/std": 0.519324541091919, "step": 904, "train_speed(iter/s)": 0.081137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 740.0, "completions/mean_length": 478.0476379394531, "completions/min_length": 306.0, "entropy/max": 1.109375, "entropy/mean": 0.490234375, "entropy/min": 0.11669921875, "epoch": 0.905, "grad_norm": 1.1137963129761075, "kl": 0.2373046875, "learning_rate": 4.5097758555926127e-08, "loss": 0.0024147345684468746, "memory(GiB)": 137.41, "reward": 2.0289058685302734, "reward_std": 0.09318455308675766, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.563530683517456, "rewards/EvidenceHallucination/std": 0.43566638231277466, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 1.1854714155197144, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8161997199058533, "rewards/VideoAccuracy/std": 0.42606857419013977, "step": 905, "train_speed(iter/s)": 0.080757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 742.0, "completions/mean_length": 457.8809509277344, "completions/min_length": 305.0, "entropy/max": 0.61328125, "entropy/mean": 0.453125, "entropy/min": 0.31640625, "epoch": 0.906, "grad_norm": 1.4296618106636225, "kl": 0.29296875, "learning_rate": 4.416034294373472e-08, "loss": 0.002946004271507263, "memory(GiB)": 137.41, "reward": 1.829820156097412, "reward_std": 0.3908424377441406, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.607211172580719, "rewards/EvidenceHallucination/std": 0.4409499764442444, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 0.9358022809028625, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.6464732885360718, "rewards/VideoAccuracy/std": 0.39280256628990173, "step": 906, "train_speed(iter/s)": 0.080474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/mean_length": 483.76190185546875, "completions/min_length": 349.0, "entropy/max": 0.59765625, "entropy/mean": 0.4296875, "entropy/min": 0.2578125, "epoch": 0.907, "grad_norm": 1.108937079876599, "kl": 0.279296875, "learning_rate": 4.323255262627845e-08, "loss": 0.0028140272479504347, "memory(GiB)": 137.41, "reward": 1.8778079748153687, "reward_std": 0.151490718126297, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.48161962628364563, "rewards/EvidenceHallucination/std": 0.46850135922431946, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.134661316871643, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6814842224121094, "rewards/VideoAccuracy/std": 0.6318930387496948, "step": 907, "train_speed(iter/s)": 0.080233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/mean_length": 588.4761962890625, "completions/min_length": 369.0, "entropy/max": 1.0625, "entropy/mean": 0.380859375, "entropy/min": 0.1474609375, "epoch": 0.908, "grad_norm": 1.101879433193266, "kl": 0.2001953125, "learning_rate": 4.231439694639483e-08, "loss": 0.002064464846625924, "memory(GiB)": 137.41, "reward": 2.2261457443237305, "reward_std": 0.17315956950187683, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6907598376274109, "rewards/EvidenceHallucination/std": 0.3077680766582489, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.7419134378433228, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9546603560447693, "rewards/VideoAccuracy/std": 0.26798194646835327, "step": 908, "train_speed(iter/s)": 0.079857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 670.0, "completions/mean_length": 482.0238037109375, "completions/min_length": 348.0, "entropy/max": 0.58203125, "entropy/mean": 0.439453125, "entropy/min": 0.2890625, "epoch": 0.909, "grad_norm": 0.9415331910374828, "kl": 0.271484375, "learning_rate": 4.140588514990162e-08, "loss": 0.0027337963692843914, "memory(GiB)": 137.41, "reward": 1.7474002838134766, "reward_std": 0.10227378457784653, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5650557279586792, "rewards/EvidenceHallucination/std": 0.4620368182659149, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.192309856414795, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.5772462487220764, "rewards/VideoAccuracy/std": 0.450177937746048, "step": 909, "train_speed(iter/s)": 0.079625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 588.0, "completions/mean_length": 458.5476379394531, "completions/min_length": 309.0, "entropy/max": 0.5390625, "entropy/mean": 0.44140625, "entropy/min": 0.31640625, "epoch": 0.91, "grad_norm": 1.4538286084749623, "kl": 0.28515625, "learning_rate": 4.050702638550274e-08, "loss": 0.0028566864784806967, "memory(GiB)": 137.41, "reward": 1.526552677154541, "reward_std": 0.2703312933444977, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3294943869113922, "rewards/EvidenceHallucination/std": 0.4306771755218506, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7083376049995422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.4273204505443573, "rewards/VideoAccuracy/std": 0.44682276248931885, "step": 910, "train_speed(iter/s)": 0.07938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 944.0, "completions/mean_length": 599.5238037109375, "completions/min_length": 338.0, "entropy/max": 0.5234375, "entropy/mean": 0.287109375, "entropy/min": 0.10693359375, "epoch": 0.911, "grad_norm": 0.9722409780067869, "kl": 0.2060546875, "learning_rate": 3.9617829704695625e-08, "loss": 0.002105217892676592, "memory(GiB)": 137.41, "reward": 2.0139424800872803, "reward_std": 0.14438004791736603, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5142771005630493, "rewards/EvidenceHallucination/std": 0.44988352060317993, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 0.771516740322113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 0.7158487439155579, "rewards/VideoAccuracy/std": 0.4261232018470764, "step": 911, "train_speed(iter/s)": 0.079086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 654.0, "completions/mean_length": 473.40478515625, "completions/min_length": 231.0, "entropy/max": 1.0625, "entropy/mean": 0.5078125, "entropy/min": 0.1923828125, "epoch": 0.912, "grad_norm": 1.0951489120309386, "kl": 0.267578125, "learning_rate": 3.87383040616811e-08, "loss": 0.0027020308189094067, "memory(GiB)": 137.41, "reward": 1.6963773965835571, "reward_std": 0.10742451995611191, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5140460133552551, "rewards/EvidenceHallucination/std": 0.4591914415359497, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.0638718605041504, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011404514313, "rewards/VideoAccuracy/mean": 0.5316633582115173, "rewards/VideoAccuracy/std": 0.4423540234565735, "step": 912, "train_speed(iter/s)": 0.078811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 587.0, "completions/mean_length": 423.5238037109375, "completions/min_length": 301.0, "entropy/max": 0.703125, "entropy/mean": 0.4609375, "entropy/min": 0.318359375, "epoch": 0.913, "grad_norm": 1.2959113487517442, "kl": 0.283203125, "learning_rate": 3.78684583132729e-08, "loss": 0.0028616702184081078, "memory(GiB)": 137.41, "reward": 1.6041069030761719, "reward_std": 0.16784727573394775, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4477301239967346, "rewards/EvidenceHallucination/std": 0.4808220863342285, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.8035924434661865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5145609974861145, "rewards/VideoAccuracy/std": 0.4534718692302704, "step": 913, "train_speed(iter/s)": 0.078499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 880.0, "completions/mean_length": 505.6190490722656, "completions/min_length": 274.0, "entropy/max": 0.5, "entropy/mean": 0.361328125, "entropy/min": 0.1708984375, "epoch": 0.914, "grad_norm": 1.292160084245574, "kl": 0.2578125, "learning_rate": 3.700830121880771e-08, "loss": 0.002599894069135189, "memory(GiB)": 137.41, "reward": 2.0603015422821045, "reward_std": 0.19325129687786102, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.47536569833755493, "rewards/EvidenceHallucination/std": 0.46217137575149536, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.9606061577796936, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7985616326332092, "rewards/VideoAccuracy/std": 0.5646498203277588, "step": 914, "train_speed(iter/s)": 0.078242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 831.0, "completions/mean_length": 504.1428527832031, "completions/min_length": 338.0, "entropy/max": 2.0, "entropy/mean": 0.43359375, "entropy/min": 0.1767578125, "epoch": 0.915, "grad_norm": 0.8688230699508614, "kl": 0.23828125, "learning_rate": 3.615784144005796e-08, "loss": 0.002415733877569437, "memory(GiB)": 137.41, "reward": 1.6662129163742065, "reward_std": 0.17078039050102234, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3501947224140167, "rewards/EvidenceHallucination/std": 0.45579585433006287, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.2137902975082397, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.4961739182472229, "rewards/VideoAccuracy/std": 0.4929002821445465, "step": 915, "train_speed(iter/s)": 0.07793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 661.0, "completions/mean_length": 462.7857360839844, "completions/min_length": 358.0, "entropy/max": 1.2109375, "entropy/mean": 0.455078125, "entropy/min": 0.25390625, "epoch": 0.916, "grad_norm": 1.0376100448719379, "kl": 0.279296875, "learning_rate": 3.531708754114437e-08, "loss": 0.0028096698224544525, "memory(GiB)": 137.41, "reward": 1.4328473806381226, "reward_std": 0.23865549266338348, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3442094027996063, "rewards/EvidenceHallucination/std": 0.44941267371177673, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.6608339548110962, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.3306720554828644, "rewards/VideoAccuracy/std": 0.4214882254600525, "step": 916, "train_speed(iter/s)": 0.077643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 578.0, "completions/mean_length": 447.6428527832031, "completions/min_length": 294.0, "entropy/max": 0.52734375, "entropy/mean": 0.41015625, "entropy/min": 0.271484375, "epoch": 0.917, "grad_norm": 1.3274216254416777, "kl": 0.30078125, "learning_rate": 3.448604798844912e-08, "loss": 0.003033221699297428, "memory(GiB)": 137.41, "reward": 2.051182985305786, "reward_std": 0.19719287753105164, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6522086262702942, "rewards/EvidenceHallucination/std": 0.4044216573238373, "rewards/Evidence_Num_Record/mean": 3.809523820877075, "rewards/Evidence_Num_Record/std": 0.7404050827026367, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.8255032896995544, "rewards/VideoAccuracy/std": 0.5068250894546509, "step": 917, "train_speed(iter/s)": 0.077421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1117.0, "completions/mean_length": 627.1428833007812, "completions/min_length": 343.0, "entropy/max": 1.0546875, "entropy/mean": 0.36328125, "entropy/min": 0.1513671875, "epoch": 0.918, "grad_norm": 1.1210837527492852, "kl": 0.2041015625, "learning_rate": 3.366473115053148e-08, "loss": 0.0020890242885798216, "memory(GiB)": 137.41, "reward": 1.8940800428390503, "reward_std": 0.18140888214111328, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.43879175186157227, "rewards/EvidenceHallucination/std": 0.4503001570701599, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 0.9323829412460327, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.6777501702308655, "rewards/VideoAccuracy/std": 0.43920210003852844, "step": 918, "train_speed(iter/s)": 0.077142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 729.0, "completions/mean_length": 472.3571472167969, "completions/min_length": 247.0, "entropy/max": 0.6875, "entropy/mean": 0.44921875, "entropy/min": 0.251953125, "epoch": 0.919, "grad_norm": 1.5327354185688424, "kl": 0.271484375, "learning_rate": 3.285314529804295e-08, "loss": 0.0027872147038578987, "memory(GiB)": 137.41, "reward": 2.064453601837158, "reward_std": 0.09906556457281113, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7417626976966858, "rewards/EvidenceHallucination/std": 0.3818971514701843, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 1.353148341178894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.825624942779541, "rewards/VideoAccuracy/std": 0.22875171899795532, "step": 919, "train_speed(iter/s)": 0.076839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 618.0, "completions/mean_length": 463.21429443359375, "completions/min_length": 319.0, "entropy/max": 0.69140625, "entropy/mean": 0.458984375, "entropy/min": 0.33984375, "epoch": 0.92, "grad_norm": 1.40761268214982, "kl": 0.283203125, "learning_rate": 3.205129860364375e-08, "loss": 0.0028404868207871914, "memory(GiB)": 137.41, "reward": 1.4765101671218872, "reward_std": 0.26599860191345215, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3487204313278198, "rewards/EvidenceHallucination/std": 0.4532499313354492, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 0.7513054609298706, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.3781946003437042, "rewards/VideoAccuracy/std": 0.37577101588249207, "step": 920, "train_speed(iter/s)": 0.076347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 835.0, "completions/mean_length": 548.2619018554688, "completions/min_length": 330.0, "entropy/max": 0.490234375, "entropy/mean": 0.28515625, "entropy/min": 0.1337890625, "epoch": 0.921, "grad_norm": 1.039517250813372, "kl": 0.2158203125, "learning_rate": 3.125919914192143e-08, "loss": 0.002195358509197831, "memory(GiB)": 137.41, "reward": 2.1239235401153564, "reward_std": 0.23332199454307556, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.576139509677887, "rewards/EvidenceHallucination/std": 0.44498687982559204, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.4915073812007904, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.8086954951286316, "rewards/VideoAccuracy/std": 0.37584733963012695, "step": 921, "train_speed(iter/s)": 0.076084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 906.0, "completions/mean_length": 505.8571472167969, "completions/min_length": 328.0, "entropy/max": 0.703125, "entropy/mean": 0.4375, "entropy/min": 0.251953125, "epoch": 0.922, "grad_norm": 1.4127220858797223, "kl": 0.251953125, "learning_rate": 3.0476854889308734e-08, "loss": 0.0025830313097685575, "memory(GiB)": 137.41, "reward": 1.8330525159835815, "reward_std": 0.3108817934989929, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5397878289222717, "rewards/EvidenceHallucination/std": 0.44190317392349243, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.1001002788543701, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.6298568844795227, "rewards/VideoAccuracy/std": 0.3939414322376251, "step": 922, "train_speed(iter/s)": 0.075874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 985.0, "completions/mean_length": 515.357177734375, "completions/min_length": 321.0, "entropy/max": 2.046875, "entropy/mean": 0.498046875, "entropy/min": 0.259765625, "epoch": 0.923, "grad_norm": 0.9224311505711394, "kl": 0.2421875, "learning_rate": 2.9704273724003526e-08, "loss": 0.002463304903358221, "memory(GiB)": 137.41, "reward": 1.363296627998352, "reward_std": 0.09228571504354477, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.2945178747177124, "rewards/EvidenceHallucination/std": 0.44748246669769287, "rewards/Evidence_Num_Record/mean": 4.214285850524902, "rewards/Evidence_Num_Record/std": 0.9761975407600403, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.30439308285713196, "rewards/VideoAccuracy/std": 0.44341716170310974, "step": 923, "train_speed(iter/s)": 0.075482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 656.0, "completions/mean_length": 489.16668701171875, "completions/min_length": 361.0, "entropy/max": 0.62890625, "entropy/mean": 0.357421875, "entropy/min": 0.138671875, "epoch": 0.924, "grad_norm": 1.0761774381857239, "kl": 0.263671875, "learning_rate": 2.8941463425889767e-08, "loss": 0.0026684931945055723, "memory(GiB)": 137.41, "reward": 2.0897278785705566, "reward_std": 0.07733561098575592, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.611544132232666, "rewards/EvidenceHallucination/std": 0.44709569215774536, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.8785083889961243, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.8007524609565735, "rewards/VideoAccuracy/std": 0.5321916937828064, "step": 924, "train_speed(iter/s)": 0.07527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 790.0, "completions/mean_length": 510.5, "completions/min_length": 271.0, "entropy/max": 0.8984375, "entropy/mean": 0.39453125, "entropy/min": 0.138671875, "epoch": 0.925, "grad_norm": 1.2208569006575813, "kl": 0.2294921875, "learning_rate": 2.8188431676458345e-08, "loss": 0.0023357027675956488, "memory(GiB)": 137.41, "reward": 2.259730815887451, "reward_std": 0.08099796622991562, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.864700436592102, "rewards/EvidenceHallucination/std": 0.21934209764003754, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.5151193141937256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9534574151039124, "rewards/VideoAccuracy/std": 0.24664339423179626, "step": 925, "train_speed(iter/s)": 0.074957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 743.0, "completions/mean_length": 487.5, "completions/min_length": 359.0, "entropy/max": 0.75, "entropy/mean": 0.455078125, "entropy/min": 0.24609375, "epoch": 0.926, "grad_norm": 1.0885052875433516, "kl": 0.275390625, "learning_rate": 2.7445186058730917e-08, "loss": 0.0027898214757442474, "memory(GiB)": 137.41, "reward": 1.572590947151184, "reward_std": 0.2691900134086609, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.35204336047172546, "rewards/EvidenceHallucination/std": 0.4198104441165924, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 1.399352788925171, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.4688488841056824, "rewards/VideoAccuracy/std": 0.451306015253067, "step": 926, "train_speed(iter/s)": 0.07473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 555.0, "completions/mean_length": 428.23809814453125, "completions/min_length": 257.0, "entropy/max": 0.578125, "entropy/mean": 0.453125, "entropy/min": 0.25390625, "epoch": 0.927, "grad_norm": 1.4724280433107912, "kl": 0.302734375, "learning_rate": 2.6711734057182413e-08, "loss": 0.0030424667056649923, "memory(GiB)": 137.41, "reward": 1.6641291379928589, "reward_std": 0.2164650410413742, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.37653282284736633, "rewards/EvidenceHallucination/std": 0.4643819332122803, "rewards/Evidence_Num_Record/mean": 3.6190476417541504, "rewards/Evidence_Num_Record/std": 0.8540400862693787, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.49358442425727844, "rewards/VideoAccuracy/std": 0.5288416147232056, "step": 927, "train_speed(iter/s)": 0.074533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 858.0, "completions/mean_length": 610.3095092773438, "completions/min_length": 376.0, "entropy/max": 0.578125, "entropy/mean": 0.302734375, "entropy/min": 0.09326171875, "epoch": 0.928, "grad_norm": 0.9018906561019351, "kl": 0.203125, "learning_rate": 2.5988083057666533e-08, "loss": 0.002057016594335437, "memory(GiB)": 137.41, "reward": 2.055840253829956, "reward_std": 0.09451095759868622, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.602540135383606, "rewards/EvidenceHallucination/std": 0.38646990060806274, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.2549552917480469, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496559262275696, "rewards/VideoAccuracy/mean": 0.8067607879638672, "rewards/VideoAccuracy/std": 0.40263330936431885, "step": 928, "train_speed(iter/s)": 0.074268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 757.0, "completions/mean_length": 434.21429443359375, "completions/min_length": 318.0, "entropy/max": 0.71484375, "entropy/mean": 0.39453125, "entropy/min": 0.228515625, "epoch": 0.929, "grad_norm": 1.0676244490347249, "kl": 0.275390625, "learning_rate": 2.5274240347340715e-08, "loss": 0.0027612752746790648, "memory(GiB)": 137.41, "reward": 1.6721405982971191, "reward_std": 0.13350006937980652, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5001934766769409, "rewards/EvidenceHallucination/std": 0.46985816955566406, "rewards/Evidence_Num_Record/mean": 3.738095283508301, "rewards/Evidence_Num_Record/std": 0.8850939869880676, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.47686392068862915, "rewards/VideoAccuracy/std": 0.3949807584285736, "step": 929, "train_speed(iter/s)": 0.073993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 777.0, "completions/mean_length": 483.16668701171875, "completions/min_length": 289.0, "entropy/max": 0.8671875, "entropy/mean": 0.4609375, "entropy/min": 0.275390625, "epoch": 0.93, "grad_norm": 1.2716437008238204, "kl": 0.26171875, "learning_rate": 2.4570213114592953e-08, "loss": 0.0026454541366547346, "memory(GiB)": 137.41, "reward": 1.5933008193969727, "reward_std": 0.3612544536590576, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4227463901042938, "rewards/EvidenceHallucination/std": 0.4742928743362427, "rewards/Evidence_Num_Record/mean": 4.428571701049805, "rewards/Evidence_Num_Record/std": 2.3073885440826416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.48017996549606323, "rewards/VideoAccuracy/std": 0.4840567708015442, "step": 930, "train_speed(iter/s)": 0.073792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1160.0, "completions/mean_length": 608.857177734375, "completions/min_length": 347.0, "entropy/max": 0.55078125, "entropy/mean": 0.28515625, "entropy/min": 0.123046875, "epoch": 0.931, "grad_norm": 1.0547840891430953, "kl": 0.2041015625, "learning_rate": 2.3876008448969977e-08, "loss": 0.0020937395747750998, "memory(GiB)": 137.41, "reward": 1.9991267919540405, "reward_std": 0.1399175077676773, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.26360222697257996, "rewards/EvidenceHallucination/std": 0.3506872057914734, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 0.7624309062957764, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9523809552192688, "rewards/HonestTime/std": 0.21554027497768402, "rewards/VideoAccuracy/mean": 0.7559301257133484, "rewards/VideoAccuracy/std": 0.5217366814613342, "step": 931, "train_speed(iter/s)": 0.073486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/mean_length": 515.452392578125, "completions/min_length": 286.0, "entropy/max": 0.953125, "entropy/mean": 0.44921875, "entropy/min": 0.23046875, "epoch": 0.932, "grad_norm": 1.5907493693109938, "kl": 0.267578125, "learning_rate": 2.3191633341104855e-08, "loss": 0.002785654505714774, "memory(GiB)": 137.41, "reward": 1.6841378211975098, "reward_std": 0.1959161013364792, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3821144700050354, "rewards/EvidenceHallucination/std": 0.435477614402771, "rewards/Evidence_Num_Record/mean": 4.904761791229248, "rewards/Evidence_Num_Record/std": 2.6486032009124756, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4285714328289032, "rewards/HonestTime/std": 0.5008702874183655, "rewards/VideoAccuracy/mean": 0.5220006108283997, "rewards/VideoAccuracy/std": 0.3990272283554077, "step": 932, "train_speed(iter/s)": 0.073237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1972.0, "completions/mean_length": 492.71429443359375, "completions/min_length": 309.0, "entropy/max": 0.80078125, "entropy/mean": 0.43359375, "entropy/min": 0.205078125, "epoch": 0.933, "grad_norm": 1.3993444707025986, "kl": 0.255859375, "learning_rate": 2.2517094682647396e-08, "loss": 0.0026667274069041014, "memory(GiB)": 137.41, "reward": 1.4650702476501465, "reward_std": 0.1610909253358841, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3479980528354645, "rewards/EvidenceHallucination/std": 0.45839816331863403, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 3.8258397579193115, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.39547058939933777, "rewards/VideoAccuracy/std": 0.4801982343196869, "step": 933, "train_speed(iter/s)": 0.072829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 688.0, "completions/mean_length": 515.3095092773438, "completions/min_length": 389.0, "entropy/max": 0.5703125, "entropy/mean": 0.369140625, "entropy/min": 0.1494140625, "epoch": 0.934, "grad_norm": 1.0225077561198843, "kl": 0.2421875, "learning_rate": 2.185239926619431e-08, "loss": 0.002430618042126298, "memory(GiB)": 137.41, "reward": 2.2757763862609863, "reward_std": 0.09640401601791382, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6919313073158264, "rewards/EvidenceHallucination/std": 0.42337656021118164, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.584348738193512, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.9754852056503296, "rewards/VideoAccuracy/std": 0.46474310755729675, "step": 934, "train_speed(iter/s)": 0.072638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1123.0, "completions/mean_length": 543.047607421875, "completions/min_length": 321.0, "entropy/max": 1.21875, "entropy/mean": 0.4375, "entropy/min": 0.10302734375, "epoch": 0.935, "grad_norm": 1.019911305431324, "kl": 0.21875, "learning_rate": 2.119755378522137e-08, "loss": 0.0022481405176222324, "memory(GiB)": 137.41, "reward": 1.7865290641784668, "reward_std": 0.15708360075950623, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.43877112865448, "rewards/EvidenceHallucination/std": 0.4510277807712555, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.0101114511489868, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5654414892196655, "rewards/VideoAccuracy/std": 0.4084523320198059, "step": 935, "train_speed(iter/s)": 0.072413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 752.0, "completions/mean_length": 463.6428527832031, "completions/min_length": 250.0, "entropy/max": 0.77734375, "entropy/mean": 0.4765625, "entropy/min": 0.25390625, "epoch": 0.936, "grad_norm": 0.9792749729206832, "kl": 0.259765625, "learning_rate": 2.0552564834014797e-08, "loss": 0.0026516977231949568, "memory(GiB)": 137.41, "reward": 1.4407650232315063, "reward_std": 0.23377752304077148, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3174581229686737, "rewards/EvidenceHallucination/std": 0.45641613006591797, "rewards/Evidence_Num_Record/mean": 3.952381134033203, "rewards/Evidence_Num_Record/std": 1.3960288763046265, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.34870198369026184, "rewards/VideoAccuracy/std": 0.45109590888023376, "step": 936, "train_speed(iter/s)": 0.072194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 548.0, "completions/mean_length": 451.3809509277344, "completions/min_length": 359.0, "entropy/max": 0.5390625, "entropy/mean": 0.4453125, "entropy/min": 0.314453125, "epoch": 0.937, "grad_norm": 1.4266157784650282, "kl": 0.296875, "learning_rate": 1.9917438907606553e-08, "loss": 0.0029722112230956554, "memory(GiB)": 137.41, "reward": 2.01104474067688, "reward_std": 0.28446537256240845, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6532959938049316, "rewards/EvidenceHallucination/std": 0.4294026494026184, "rewards/Evidence_Num_Record/mean": 3.8333334922790527, "rewards/Evidence_Num_Record/std": 0.8811485767364502, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4523809552192688, "rewards/HonestTime/std": 0.503760576248169, "rewards/VideoAccuracy/mean": 0.7899093627929688, "rewards/VideoAccuracy/std": 0.4812328815460205, "step": 937, "train_speed(iter/s)": 0.072003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 932.0, "completions/mean_length": 586.5, "completions/min_length": 343.0, "entropy/max": 1.1328125, "entropy/mean": 0.341796875, "entropy/min": 0.1171875, "epoch": 0.938, "grad_norm": 0.826713359596132, "kl": 0.2060546875, "learning_rate": 1.9292182401707602e-08, "loss": 0.0020839450880885124, "memory(GiB)": 137.41, "reward": 1.9464994668960571, "reward_std": 0.05200798064470291, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5436687469482422, "rewards/EvidenceHallucination/std": 0.4424469470977783, "rewards/Evidence_Num_Record/mean": 4.238095283508301, "rewards/Evidence_Num_Record/std": 1.1001002788543701, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.7044323682785034, "rewards/VideoAccuracy/std": 0.43772637844085693, "step": 938, "train_speed(iter/s)": 0.071761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 877.0, "completions/mean_length": 446.9761962890625, "completions/min_length": 326.0, "entropy/max": 0.80859375, "entropy/mean": 0.421875, "entropy/min": 0.279296875, "epoch": 0.939, "grad_norm": 1.3265743791902576, "kl": 0.275390625, "learning_rate": 1.8676801612643954e-08, "loss": 0.0028159632347524166, "memory(GiB)": 137.41, "reward": 1.7494856119155884, "reward_std": 0.28992414474487305, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5868315100669861, "rewards/EvidenceHallucination/std": 0.46872034668922424, "rewards/Evidence_Num_Record/mean": 3.690476179122925, "rewards/Evidence_Num_Record/std": 0.6434698104858398, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.5654523968696594, "rewards/VideoAccuracy/std": 0.43522748351097107, "step": 939, "train_speed(iter/s)": 0.071502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 793.0, "completions/mean_length": 488.452392578125, "completions/min_length": 333.0, "entropy/max": 0.765625, "entropy/mean": 0.5, "entropy/min": 0.2578125, "epoch": 0.94, "grad_norm": 1.2902749148649342, "kl": 0.271484375, "learning_rate": 1.807130273729329e-08, "loss": 0.002739190822467208, "memory(GiB)": 137.41, "reward": 1.7307583093643188, "reward_std": 0.2225092649459839, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45415210723876953, "rewards/EvidenceHallucination/std": 0.46369168162345886, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.9093654155731201, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6065945625305176, "rewards/VideoAccuracy/std": 0.568202555179596, "step": 940, "train_speed(iter/s)": 0.071291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 923.0, "completions/mean_length": 569.90478515625, "completions/min_length": 362.0, "entropy/max": 0.546875, "entropy/mean": 0.271484375, "entropy/min": 0.1357421875, "epoch": 0.941, "grad_norm": 1.0144868951991712, "kl": 0.21484375, "learning_rate": 1.747569187302267e-08, "loss": 0.0021691806614398956, "memory(GiB)": 137.41, "reward": 2.290971517562866, "reward_std": 0.25219282507896423, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6649720668792725, "rewards/EvidenceHallucination/std": 0.35364794731140137, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.6917466521263123, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9579771161079407, "rewards/VideoAccuracy/std": 0.4417040944099426, "step": 941, "train_speed(iter/s)": 0.071053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 815.0, "completions/mean_length": 460.3333435058594, "completions/min_length": 305.0, "entropy/max": 0.6875, "entropy/mean": 0.443359375, "entropy/min": 0.298828125, "epoch": 0.942, "grad_norm": 1.3556173582357736, "kl": 0.2734375, "learning_rate": 1.68899750176269e-08, "loss": 0.0027679037302732468, "memory(GiB)": 137.41, "reward": 2.120345115661621, "reward_std": 0.043410640209913254, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.8824974298477173, "rewards/EvidenceHallucination/std": 0.10965771973133087, "rewards/Evidence_Num_Record/mean": 4.0, "rewards/Evidence_Num_Record/std": 0.8834521770477295, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711870074272156, "rewards/VideoAccuracy/mean": 0.8771790266036987, "rewards/VideoAccuracy/std": 0.18448089063167572, "step": 942, "train_speed(iter/s)": 0.070841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 697.0, "completions/mean_length": 440.8333435058594, "completions/min_length": 304.0, "entropy/max": 0.59765625, "entropy/mean": 0.453125, "entropy/min": 0.3046875, "epoch": 0.943, "grad_norm": 1.1990253561088162, "kl": 0.275390625, "learning_rate": 1.6314158069267946e-08, "loss": 0.002778817666694522, "memory(GiB)": 137.41, "reward": 1.6294825077056885, "reward_std": 0.18033751845359802, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4706454277038574, "rewards/EvidenceHallucination/std": 0.4809424579143524, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 0.813646674156189, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5353533625602722, "rewards/VideoAccuracy/std": 0.4947953522205353, "step": 943, "train_speed(iter/s)": 0.070629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 700.0, "completions/mean_length": 484.3571472167969, "completions/min_length": 286.0, "entropy/max": 0.5625, "entropy/mean": 0.376953125, "entropy/min": 0.10595703125, "epoch": 0.944, "grad_norm": 1.212993572010397, "kl": 0.263671875, "learning_rate": 1.574824682641629e-08, "loss": 0.0026726480573415756, "memory(GiB)": 137.41, "reward": 1.8845123052597046, "reward_std": 0.20181572437286377, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3115893006324768, "rewards/EvidenceHallucination/std": 0.4246801733970642, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.8913052082061768, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6555277705192566, "rewards/VideoAccuracy/std": 0.45412132143974304, "step": 944, "train_speed(iter/s)": 0.070466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 829.0, "completions/mean_length": 504.19049072265625, "completions/min_length": 346.0, "entropy/max": 0.72265625, "entropy/mean": 0.408203125, "entropy/min": 0.162109375, "epoch": 0.945, "grad_norm": 0.9727982469014594, "kl": 0.251953125, "learning_rate": 1.519224698779198e-08, "loss": 0.002565240953117609, "memory(GiB)": 137.41, "reward": 1.833389163017273, "reward_std": 0.11947314441204071, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4485011398792267, "rewards/EvidenceHallucination/std": 0.44314906001091003, "rewards/Evidence_Num_Record/mean": 4.0714287757873535, "rewards/Evidence_Num_Record/std": 1.7021199464797974, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.6436887979507446, "rewards/VideoAccuracy/std": 0.5075082182884216, "step": 945, "train_speed(iter/s)": 0.070227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1908.0, "completions/mean_length": 541.3333129882812, "completions/min_length": 327.0, "entropy/max": 1.3203125, "entropy/mean": 0.46875, "entropy/min": 0.158203125, "epoch": 0.946, "grad_norm": 1.2465993695248823, "kl": 0.2451171875, "learning_rate": 1.4646164152307016e-08, "loss": 0.0025582569651305676, "memory(GiB)": 137.41, "reward": 1.7707005739212036, "reward_std": 0.14271783828735352, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4889131784439087, "rewards/EvidenceHallucination/std": 0.4265022277832031, "rewards/Evidence_Num_Record/mean": 5.214285850524902, "rewards/Evidence_Num_Record/std": 4.099587917327881, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6395845413208008, "rewards/VideoAccuracy/std": 0.4320310354232788, "step": 946, "train_speed(iter/s)": 0.069892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/mean_length": 470.452392578125, "completions/min_length": 350.0, "entropy/max": 0.66015625, "entropy/mean": 0.4296875, "entropy/min": 0.283203125, "epoch": 0.947, "grad_norm": 1.1568858956657802, "kl": 0.28125, "learning_rate": 1.4110003819009509e-08, "loss": 0.002838264685124159, "memory(GiB)": 137.41, "reward": 1.844517707824707, "reward_std": 0.12314598262310028, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3775913119316101, "rewards/EvidenceHallucination/std": 0.4459899663925171, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.8111447691917419, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.6689993143081665, "rewards/VideoAccuracy/std": 0.5376002788543701, "step": 947, "train_speed(iter/s)": 0.069711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1072.0, "completions/mean_length": 537.857177734375, "completions/min_length": 350.0, "entropy/max": 2.015625, "entropy/mean": 0.349609375, "entropy/min": 0.126953125, "epoch": 0.948, "grad_norm": 1.0323083487996982, "kl": 0.20703125, "learning_rate": 1.3583771387028264e-08, "loss": 0.0021275142207741737, "memory(GiB)": 137.41, "reward": 2.2996435165405273, "reward_std": 0.07079820334911346, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7686232924461365, "rewards/EvidenceHallucination/std": 0.3105987012386322, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.8913052678108215, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 1.0125852823257446, "rewards/VideoAccuracy/std": 0.1856028288602829, "step": 948, "train_speed(iter/s)": 0.069326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 765.0, "completions/mean_length": 486.4285888671875, "completions/min_length": 339.0, "entropy/max": 0.875, "entropy/mean": 0.451171875, "entropy/min": 0.234375, "epoch": 0.949, "grad_norm": 1.1457712081147309, "kl": 0.2734375, "learning_rate": 1.3067472155517734e-08, "loss": 0.0027662317734211683, "memory(GiB)": 137.41, "reward": 1.6505743265151978, "reward_std": 0.12623053789138794, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.44372376799583435, "rewards/EvidenceHallucination/std": 0.4626716375350952, "rewards/Evidence_Num_Record/mean": 4.142857074737549, "rewards/Evidence_Num_Record/std": 1.049307107925415, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.5046866536140442, "rewards/VideoAccuracy/std": 0.4409211575984955, "step": 949, "train_speed(iter/s)": 0.069144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 645.0, "completions/mean_length": 448.0476379394531, "completions/min_length": 307.0, "entropy/max": 0.67578125, "entropy/mean": 0.42578125, "entropy/min": 0.203125, "epoch": 0.95, "grad_norm": 1.2601635925600487, "kl": 0.26171875, "learning_rate": 1.2561111323605711e-08, "loss": 0.0026425044052302837, "memory(GiB)": 137.41, "reward": 1.7202696800231934, "reward_std": 0.1176731213927269, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5164002776145935, "rewards/EvidenceHallucination/std": 0.45663920044898987, "rewards/Evidence_Num_Record/mean": 3.4285714626312256, "rewards/Evidence_Num_Record/std": 0.9144598841667175, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5836562514305115, "rewards/VideoAccuracy/std": 0.46830034255981445, "step": 950, "train_speed(iter/s)": 0.068991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 847.0, "completions/mean_length": 575.5238037109375, "completions/min_length": 370.0, "entropy/max": 0.494140625, "entropy/mean": 0.267578125, "entropy/min": 0.142578125, "epoch": 0.951, "grad_norm": 0.9935481006326623, "kl": 0.20703125, "learning_rate": 1.2064693990339936e-08, "loss": 0.002088331850245595, "memory(GiB)": 137.41, "reward": 2.3192498683929443, "reward_std": 0.15067747235298157, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7569528222084045, "rewards/EvidenceHallucination/std": 0.29730042815208435, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 0.9236220121383667, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.9678592085838318, "rewards/VideoAccuracy/std": 0.25056150555610657, "step": 951, "train_speed(iter/s)": 0.068747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 934.0, "completions/mean_length": 482.4285888671875, "completions/min_length": 336.0, "entropy/max": 1.8671875, "entropy/mean": 0.515625, "entropy/min": 0.1796875, "epoch": 0.952, "grad_norm": 1.2671158801258262, "kl": 0.259765625, "learning_rate": 1.1578225154637578e-08, "loss": 0.002628859132528305, "memory(GiB)": 137.41, "reward": 1.8591318130493164, "reward_std": 0.14313216507434845, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5897926688194275, "rewards/EvidenceHallucination/std": 0.4144156873226166, "rewards/Evidence_Num_Record/mean": 4.642857074737549, "rewards/Evidence_Num_Record/std": 1.664865493774414, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.6840303540229797, "rewards/VideoAccuracy/std": 0.4067501425743103, "step": 952, "train_speed(iter/s)": 0.06856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 542.0, "completions/mean_length": 405.4285888671875, "completions/min_length": 276.0, "entropy/max": 0.66796875, "entropy/mean": 0.439453125, "entropy/min": 0.2890625, "epoch": 0.953, "grad_norm": 1.0800177401519737, "kl": 0.294921875, "learning_rate": 1.1101709715234386e-08, "loss": 0.0029569934122264385, "memory(GiB)": 137.41, "reward": 1.3686178922653198, "reward_std": 0.1393006443977356, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29636797308921814, "rewards/EvidenceHallucination/std": 0.433406800031662, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.8611501455307007, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.30934420228004456, "rewards/VideoAccuracy/std": 0.4380314350128174, "step": 953, "train_speed(iter/s)": 0.068385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1036.0, "completions/mean_length": 550.452392578125, "completions/min_length": 320.0, "entropy/max": 0.6484375, "entropy/mean": 0.3671875, "entropy/min": 0.1640625, "epoch": 0.954, "grad_norm": 1.061834776497953, "kl": 0.2451171875, "learning_rate": 1.0635152470635511e-08, "loss": 0.002483302028849721, "memory(GiB)": 137.41, "reward": 2.0882914066314697, "reward_std": 0.13326478004455566, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6413856148719788, "rewards/EvidenceHallucination/std": 0.43948817253112793, "rewards/Evidence_Num_Record/mean": 4.047619342803955, "rewards/Evidence_Num_Record/std": 0.7948732376098633, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7933475971221924, "rewards/VideoAccuracy/std": 0.45950794219970703, "step": 954, "train_speed(iter/s)": 0.068159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 854.0, "completions/mean_length": 531.952392578125, "completions/min_length": 345.0, "entropy/max": 1.0078125, "entropy/mean": 0.388671875, "entropy/min": 0.150390625, "epoch": 0.955, "grad_norm": 1.2273595567320048, "kl": 0.2490234375, "learning_rate": 1.0178558119067315e-08, "loss": 0.0025239530950784683, "memory(GiB)": 137.41, "reward": 2.1271867752075195, "reward_std": 0.10089495033025742, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7541455030441284, "rewards/EvidenceHallucination/std": 0.25523731112480164, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.7662469148635864, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.506060779094696, "rewards/VideoAccuracy/mean": 0.8763576149940491, "rewards/VideoAccuracy/std": 0.22635163366794586, "step": 955, "train_speed(iter/s)": 0.067904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 710.0, "completions/mean_length": 479.452392578125, "completions/min_length": 313.0, "entropy/max": 0.68359375, "entropy/mean": 0.46484375, "entropy/min": 0.294921875, "epoch": 0.956, "grad_norm": 1.1081920964545056, "kl": 0.263671875, "learning_rate": 9.731931258429638e-09, "loss": 0.002650549402460456, "memory(GiB)": 137.41, "reward": 1.5818074941635132, "reward_std": 0.20420019328594208, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.42155587673187256, "rewards/EvidenceHallucination/std": 0.47306376695632935, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.21091628074646, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.46416300535202026, "rewards/VideoAccuracy/std": 0.472253680229187, "step": 956, "train_speed(iter/s)": 0.067742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 698.0, "completions/mean_length": 451.7857360839844, "completions/min_length": 307.0, "entropy/max": 0.5234375, "entropy/mean": 0.408203125, "entropy/min": 0.275390625, "epoch": 0.957, "grad_norm": 1.3877350890068532, "kl": 0.283203125, "learning_rate": 9.295276386250273e-09, "loss": 0.0028288476169109344, "memory(GiB)": 137.41, "reward": 2.050288200378418, "reward_std": 0.24018505215644836, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6676385402679443, "rewards/EvidenceHallucination/std": 0.4354212284088135, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 1.007521152496338, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8167604804039001, "rewards/VideoAccuracy/std": 0.520463228225708, "step": 957, "train_speed(iter/s)": 0.067538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 867.0, "completions/mean_length": 519.6904907226562, "completions/min_length": 334.0, "entropy/max": 0.93359375, "entropy/mean": 0.408203125, "entropy/min": 0.138671875, "epoch": 0.958, "grad_norm": 0.9809102349146418, "kl": 0.2216796875, "learning_rate": 8.868597899638897e-09, "loss": 0.0022694962099194527, "memory(GiB)": 137.41, "reward": 2.078068733215332, "reward_std": 0.08545194566249847, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5540817975997925, "rewards/EvidenceHallucination/std": 0.44805270433425903, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 1.0704021453857422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.8434427380561829, "rewards/VideoAccuracy/std": 0.49243026971817017, "step": 958, "train_speed(iter/s)": 0.067335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 657.0, "completions/mean_length": 469.21429443359375, "completions/min_length": 283.0, "entropy/max": 0.6171875, "entropy/mean": 0.4296875, "entropy/min": 0.25, "epoch": 0.959, "grad_norm": 1.1630105588447377, "kl": 0.27734375, "learning_rate": 8.45190009524288e-09, "loss": 0.0028287163004279137, "memory(GiB)": 137.41, "reward": 1.6504467725753784, "reward_std": 0.2651764154434204, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.47433099150657654, "rewards/EvidenceHallucination/std": 0.46519747376441956, "rewards/Evidence_Num_Record/mean": 4.11904764175415, "rewards/Evidence_Num_Record/std": 1.2337208986282349, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.2857142984867096, "rewards/HonestTime/std": 0.45722994208335876, "rewards/VideoAccuracy/mean": 0.49843770265579224, "rewards/VideoAccuracy/std": 0.4565110504627228, "step": 959, "train_speed(iter/s)": 0.067155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 561.0, "completions/mean_length": 423.3333435058594, "completions/min_length": 269.0, "entropy/max": 0.6484375, "entropy/mean": 0.439453125, "entropy/min": 0.234375, "epoch": 0.96, "grad_norm": 1.2660896652549583, "kl": 0.287109375, "learning_rate": 8.045187169204658e-09, "loss": 0.0028954786248505116, "memory(GiB)": 137.41, "reward": 1.407809853553772, "reward_std": 0.25971004366874695, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.22925804555416107, "rewards/EvidenceHallucination/std": 0.39457935094833374, "rewards/Evidence_Num_Record/mean": 3.595238208770752, "rewards/Evidence_Num_Record/std": 1.0135550498962402, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.32862478494644165, "rewards/VideoAccuracy/std": 0.3720688819885254, "step": 960, "train_speed(iter/s)": 0.067007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 947.0, "completions/mean_length": 550.2142944335938, "completions/min_length": 360.0, "entropy/max": 0.486328125, "entropy/mean": 0.298828125, "entropy/min": 0.1279296875, "epoch": 0.961, "grad_norm": 0.9837898086934238, "kl": 0.22265625, "learning_rate": 7.648463217118983e-09, "loss": 0.0022573107853531837, "memory(GiB)": 137.41, "reward": 2.3989174365997314, "reward_std": 0.07762474566698074, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7259609699249268, "rewards/EvidenceHallucination/std": 0.36685147881507874, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.7904776334762573, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.9761905074119568, "rewards/HonestTime/std": 0.15430334210395813, "rewards/VideoAccuracy/mean": 1.0584871768951416, "rewards/VideoAccuracy/std": 0.36890536546707153, "step": 961, "train_speed(iter/s)": 0.066794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 695.0, "completions/mean_length": 476.6428527832031, "completions/min_length": 293.0, "entropy/max": 1.671875, "entropy/mean": 0.54296875, "entropy/min": 0.28515625, "epoch": 0.962, "grad_norm": 1.368957187885661, "kl": 0.279296875, "learning_rate": 7.261732233991513e-09, "loss": 0.00282856822013855, "memory(GiB)": 137.41, "reward": 1.9559398889541626, "reward_std": 0.14292877912521362, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6334197521209717, "rewards/EvidenceHallucination/std": 0.39311859011650085, "rewards/Evidence_Num_Record/mean": 4.452381134033203, "rewards/Evidence_Num_Record/std": 1.4517000913619995, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3095238208770752, "rewards/HonestTime/std": 0.4679011106491089, "rewards/VideoAccuracy/mean": 0.7673510909080505, "rewards/VideoAccuracy/std": 0.35943979024887085, "step": 962, "train_speed(iter/s)": 0.066592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 687.0, "completions/mean_length": 397.1428527832031, "completions/min_length": 269.0, "entropy/max": 0.5390625, "entropy/mean": 0.419921875, "entropy/min": 0.28515625, "epoch": 0.963, "grad_norm": 1.4789215880791144, "kl": 0.2734375, "learning_rate": 6.884998114198959e-09, "loss": 0.0027652564458549023, "memory(GiB)": 137.41, "reward": 1.682898759841919, "reward_std": 0.264392614364624, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5018981695175171, "rewards/EvidenceHallucination/std": 0.4526165723800659, "rewards/Evidence_Num_Record/mean": 3.142857313156128, "rewards/Evidence_Num_Record/std": 0.8715399503707886, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.5825192928314209, "rewards/VideoAccuracy/std": 0.4478265643119812, "step": 963, "train_speed(iter/s)": 0.066416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 882.0, "completions/mean_length": 517.3095092773438, "completions/min_length": 284.0, "entropy/max": 0.61328125, "entropy/mean": 0.3515625, "entropy/min": 0.1220703125, "epoch": 0.964, "grad_norm": 1.2469047557569142, "kl": 0.2431640625, "learning_rate": 6.518264651449779e-09, "loss": 0.0024622909259051085, "memory(GiB)": 137.41, "reward": 2.220378875732422, "reward_std": 0.16395387053489685, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7585100531578064, "rewards/EvidenceHallucination/std": 0.3773439824581146, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 0.9258201122283936, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.9067720770835876, "rewards/VideoAccuracy/std": 0.38802117109298706, "step": 964, "train_speed(iter/s)": 0.066269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1006.0, "completions/mean_length": 560.8333740234375, "completions/min_length": 321.0, "entropy/max": 1.125, "entropy/mean": 0.427734375, "entropy/min": 0.126953125, "epoch": 0.965, "grad_norm": 1.1405925129994456, "kl": 0.2294921875, "learning_rate": 6.161535538745877e-09, "loss": 0.0023431219160556793, "memory(GiB)": 137.41, "reward": 2.2555789947509766, "reward_std": 0.0632232129573822, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.8684941530227661, "rewards/EvidenceHallucination/std": 0.11418500542640686, "rewards/Evidence_Num_Record/mean": 4.357142925262451, "rewards/Evidence_Num_Record/std": 1.4283100366592407, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 1.0152136087417603, "rewards/VideoAccuracy/std": 0.17440201342105865, "step": 965, "train_speed(iter/s)": 0.066059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 776.0, "completions/mean_length": 462.3571472167969, "completions/min_length": 327.0, "entropy/max": 1.28125, "entropy/mean": 0.50390625, "entropy/min": 0.2890625, "epoch": 0.966, "grad_norm": 1.234171819098209, "kl": 0.27734375, "learning_rate": 5.814814368345411e-09, "loss": 0.002802126109600067, "memory(GiB)": 137.41, "reward": 1.6601600646972656, "reward_std": 0.23421823978424072, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45707494020462036, "rewards/EvidenceHallucination/std": 0.4481717646121979, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.1536942720413208, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5401737689971924, "rewards/VideoAccuracy/std": 0.46416065096855164, "step": 966, "train_speed(iter/s)": 0.065872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 707.0, "completions/mean_length": 474.5238037109375, "completions/min_length": 354.0, "entropy/max": 0.5625, "entropy/mean": 0.41015625, "entropy/min": 0.259765625, "epoch": 0.967, "grad_norm": 0.9856314384361605, "kl": 0.2890625, "learning_rate": 5.47810463172671e-09, "loss": 0.00290171941742301, "memory(GiB)": 137.41, "reward": 1.616075038909912, "reward_std": 0.13448190689086914, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.292513370513916, "rewards/EvidenceHallucination/std": 0.4066556990146637, "rewards/Evidence_Num_Record/mean": 4.023809432983398, "rewards/Evidence_Num_Record/std": 0.6803189516067505, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.4575723111629486, "rewards/VideoAccuracy/std": 0.5822301506996155, "step": 967, "train_speed(iter/s)": 0.065796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 862.0, "completions/mean_length": 536.8095092773438, "completions/min_length": 336.0, "entropy/max": 0.78125, "entropy/mean": 0.326171875, "entropy/min": 0.119140625, "epoch": 0.968, "grad_norm": 1.0150307188913046, "kl": 0.21875, "learning_rate": 5.151409719553079e-09, "loss": 0.0022225372958928347, "memory(GiB)": 137.41, "reward": 1.7808631658554077, "reward_std": 0.0931321382522583, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.39275211095809937, "rewards/EvidenceHallucination/std": 0.43423381447792053, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.9830148816108704, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6428571343421936, "rewards/HonestTime/std": 0.48496562242507935, "rewards/VideoAccuracy/mean": 0.5737413167953491, "rewards/VideoAccuracy/std": 0.42062005400657654, "step": 968, "train_speed(iter/s)": 0.06562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 711.0, "completions/mean_length": 480.40478515625, "completions/min_length": 261.0, "entropy/max": 0.9765625, "entropy/mean": 0.416015625, "entropy/min": 0.1611328125, "epoch": 0.969, "grad_norm": 1.1888175493601036, "kl": 0.275390625, "learning_rate": 4.834732921638718e-09, "loss": 0.0027951847296208143, "memory(GiB)": 137.41, "reward": 1.6992765665054321, "reward_std": 0.12343087792396545, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5417040586471558, "rewards/EvidenceHallucination/std": 0.46338191628456116, "rewards/Evidence_Num_Record/mean": 4.285714149475098, "rewards/Evidence_Num_Record/std": 1.1952285766601562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.5623642802238464, "rewards/VideoAccuracy/std": 0.46270543336868286, "step": 969, "train_speed(iter/s)": 0.065423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 846.0, "completions/mean_length": 458.5, "completions/min_length": 319.0, "entropy/max": 0.8125, "entropy/mean": 0.470703125, "entropy/min": 0.23046875, "epoch": 0.97, "grad_norm": 1.4858103128918467, "kl": 0.279296875, "learning_rate": 4.528077426915411e-09, "loss": 0.0028038870077580214, "memory(GiB)": 137.41, "reward": 1.5698540210723877, "reward_std": 0.3884607255458832, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3760813772678375, "rewards/EvidenceHallucination/std": 0.4657697379589081, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 1.157964825630188, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.4660661518573761, "rewards/VideoAccuracy/std": 0.5560010671615601, "step": 970, "train_speed(iter/s)": 0.06524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1025.0, "completions/mean_length": 563.1428833007812, "completions/min_length": 324.0, "entropy/max": 0.484375, "entropy/mean": 0.255859375, "entropy/min": 0.12255859375, "epoch": 0.971, "grad_norm": 1.0478267118673452, "kl": 0.22265625, "learning_rate": 4.231446323400556e-09, "loss": 0.002258533611893654, "memory(GiB)": 137.41, "reward": 2.412954807281494, "reward_std": 0.12907324731349945, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.66871577501297, "rewards/EvidenceHallucination/std": 0.30906942486763, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.8502919673919678, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.079211711883545, "rewards/VideoAccuracy/std": 0.3644346594810486, "step": 971, "train_speed(iter/s)": 0.065052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 670.0, "completions/mean_length": 481.23809814453125, "completions/min_length": 336.0, "entropy/max": 1.015625, "entropy/mean": 0.484375, "entropy/min": 0.283203125, "epoch": 0.972, "grad_norm": 1.1357201236936143, "kl": 0.271484375, "learning_rate": 3.944842598166187e-09, "loss": 0.002750544808804989, "memory(GiB)": 137.41, "reward": 1.3433635234832764, "reward_std": 0.11884146928787231, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.21864919364452362, "rewards/EvidenceHallucination/std": 0.37373003363609314, "rewards/Evidence_Num_Record/mean": 4.5714287757873535, "rewards/Evidence_Num_Record/std": 1.3460485935211182, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.2710622549057007, "rewards/VideoAccuracy/std": 0.4434342086315155, "step": 972, "train_speed(iter/s)": 0.064901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/mean_length": 445.2857360839844, "completions/min_length": 198.0, "entropy/max": 0.66015625, "entropy/mean": 0.443359375, "entropy/min": 0.2236328125, "epoch": 0.973, "grad_norm": 1.1542425201520503, "kl": 0.265625, "learning_rate": 3.6682691373086662e-09, "loss": 0.0026901671662926674, "memory(GiB)": 137.41, "reward": 1.4638851881027222, "reward_std": 0.21122592687606812, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3182029128074646, "rewards/EvidenceHallucination/std": 0.4255477786064148, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 1.3110802173614502, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4002445936203003, "rewards/VideoAccuracy/std": 0.4778297245502472, "step": 973, "train_speed(iter/s)": 0.064688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 694.0, "completions/mean_length": 448.90478515625, "completions/min_length": 329.0, "entropy/max": 0.5625, "entropy/mean": 0.34375, "entropy/min": 0.16015625, "epoch": 0.974, "grad_norm": 1.2473136745873308, "kl": 0.255859375, "learning_rate": 3.4017287259193728e-09, "loss": 0.002587102120742202, "memory(GiB)": 137.41, "reward": 2.5488216876983643, "reward_std": 0.11489617824554443, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7000425457954407, "rewards/EvidenceHallucination/std": 0.42865434288978577, "rewards/Evidence_Num_Record/mean": 3.357142925262451, "rewards/Evidence_Num_Record/std": 0.5328903794288635, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 1.2469083070755005, "rewards/VideoAccuracy/std": 0.5960439443588257, "step": 974, "train_speed(iter/s)": 0.064561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 766.0, "completions/mean_length": 486.26190185546875, "completions/min_length": 288.0, "entropy/max": 1.4140625, "entropy/mean": 0.443359375, "entropy/min": 0.1298828125, "epoch": 0.975, "grad_norm": 1.3893919596098658, "kl": 0.2392578125, "learning_rate": 3.1452240480577262e-09, "loss": 0.002424593549221754, "memory(GiB)": 137.41, "reward": 2.1735761165618896, "reward_std": 0.06262548267841339, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7403359413146973, "rewards/EvidenceHallucination/std": 0.3639848828315735, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 1.092950701713562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9588421583175659, "rewards/VideoAccuracy/std": 0.11992564797401428, "step": 975, "train_speed(iter/s)": 0.064283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 604.0, "completions/mean_length": 461.66668701171875, "completions/min_length": 280.0, "entropy/max": 0.88671875, "entropy/mean": 0.44140625, "entropy/min": 0.271484375, "epoch": 0.976, "grad_norm": 1.0344104124820364, "kl": 0.28125, "learning_rate": 2.8987576867225415e-09, "loss": 0.002832625526934862, "memory(GiB)": 137.41, "reward": 1.4347890615463257, "reward_std": 0.2360856980085373, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.33469676971435547, "rewards/EvidenceHallucination/std": 0.4358334541320801, "rewards/Evidence_Num_Record/mean": 3.904762029647827, "rewards/Evidence_Num_Record/std": 0.9830148816108704, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.3440402150154114, "rewards/VideoAccuracy/std": 0.43539971113204956, "step": 976, "train_speed(iter/s)": 0.064168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 588.0, "completions/mean_length": 439.9761962890625, "completions/min_length": 274.0, "entropy/max": 0.59375, "entropy/mean": 0.42578125, "entropy/min": 0.224609375, "epoch": 0.977, "grad_norm": 1.479544814511891, "kl": 0.2890625, "learning_rate": 2.662332123827715e-09, "loss": 0.002926081418991089, "memory(GiB)": 137.41, "reward": 2.1106927394866943, "reward_std": 0.24937795102596283, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6478333473205566, "rewards/EvidenceHallucination/std": 0.38507360219955444, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.8502919673919678, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.8811261653900146, "rewards/VideoAccuracy/std": 0.5433034300804138, "step": 977, "train_speed(iter/s)": 0.064028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1154.0, "completions/mean_length": 550.6904907226562, "completions/min_length": 327.0, "entropy/max": 1.53125, "entropy/mean": 0.3828125, "entropy/min": 0.1259765625, "epoch": 0.978, "grad_norm": 0.8558925233546997, "kl": 0.2138671875, "learning_rate": 2.435949740175802e-09, "loss": 0.002151726046577096, "memory(GiB)": 137.41, "reward": 1.85798978805542, "reward_std": 0.1420179307460785, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.36319100856781006, "rewards/EvidenceHallucination/std": 0.4514017403125763, "rewards/Evidence_Num_Record/mean": 3.857142925262451, "rewards/Evidence_Num_Record/std": 1.0257996320724487, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.652018129825592, "rewards/VideoAccuracy/std": 0.4988376200199127, "step": 978, "train_speed(iter/s)": 0.06383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 543.0, "completions/mean_length": 423.5238037109375, "completions/min_length": 347.0, "entropy/max": 1.1171875, "entropy/mean": 0.4765625, "entropy/min": 0.353515625, "epoch": 0.979, "grad_norm": 1.3022093115598243, "kl": 0.287109375, "learning_rate": 2.2196128154349235e-09, "loss": 0.0028963349759578705, "memory(GiB)": 137.41, "reward": 1.6460776329040527, "reward_std": 0.1852813959121704, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5184386372566223, "rewards/EvidenceHallucination/std": 0.46547335386276245, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.7261499166488647, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.509056568145752, "rewards/VideoAccuracy/std": 0.46530571579933167, "step": 979, "train_speed(iter/s)": 0.063673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/mean_length": 504.16668701171875, "completions/min_length": 260.0, "entropy/max": 0.66015625, "entropy/mean": 0.388671875, "entropy/min": 0.232421875, "epoch": 0.98, "grad_norm": 1.3549656682464233, "kl": 0.267578125, "learning_rate": 2.0133235281156735e-09, "loss": 0.0026948326267302036, "memory(GiB)": 137.41, "reward": 1.8087973594665527, "reward_std": 0.4004395008087158, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5858333110809326, "rewards/EvidenceHallucination/std": 0.44859325885772705, "rewards/Evidence_Num_Record/mean": 4.595238208770752, "rewards/Evidence_Num_Record/std": 2.0489084720611572, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.6582973599433899, "rewards/VideoAccuracy/std": 0.4996359050273895, "step": 980, "train_speed(iter/s)": 0.063485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 917.0, "completions/mean_length": 550.7619018554688, "completions/min_length": 344.0, "entropy/max": 0.6484375, "entropy/mean": 0.2890625, "entropy/min": 0.1328125, "epoch": 0.981, "grad_norm": 1.019291494838437, "kl": 0.2216796875, "learning_rate": 1.8170839555486927e-09, "loss": 0.0022596633061766624, "memory(GiB)": 137.41, "reward": 2.449709892272949, "reward_std": 0.13686299324035645, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.8575530648231506, "rewards/EvidenceHallucination/std": 0.17645077407360077, "rewards/Evidence_Num_Record/mean": 3.7857143878936768, "rewards/Evidence_Num_Record/std": 0.5646373629570007, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.0781992673873901, "rewards/VideoAccuracy/std": 0.2899397015571594, "step": 981, "train_speed(iter/s)": 0.063216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/mean_length": 483.5476379394531, "completions/min_length": 357.0, "entropy/max": 3.0625, "entropy/mean": 0.58984375, "entropy/min": 0.267578125, "epoch": 0.982, "grad_norm": 1.3356176737464052, "kl": 0.279296875, "learning_rate": 1.6308960738643518e-09, "loss": 0.0028589987196028233, "memory(GiB)": 137.41, "reward": 1.6344343423843384, "reward_std": 0.2198060005903244, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.45886075496673584, "rewards/EvidenceHallucination/std": 0.4734567403793335, "rewards/Evidence_Num_Record/mean": 4.190476417541504, "rewards/Evidence_Num_Record/std": 2.1779167652130127, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5093289613723755, "rewards/VideoAccuracy/std": 0.47165292501449585, "step": 982, "train_speed(iter/s)": 0.063085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 857.0, "completions/mean_length": 492.5238037109375, "completions/min_length": 323.0, "entropy/max": 0.828125, "entropy/mean": 0.46484375, "entropy/min": 0.30078125, "epoch": 0.983, "grad_norm": 1.243158746708228, "kl": 0.259765625, "learning_rate": 1.4547617579725446e-09, "loss": 0.0026392736472189426, "memory(GiB)": 137.41, "reward": 1.5035511255264282, "reward_std": 0.30378079414367676, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3797284960746765, "rewards/EvidenceHallucination/std": 0.46767836809158325, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.6088099479675293, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.42760542035102844, "rewards/VideoAccuracy/std": 0.4682418406009674, "step": 983, "train_speed(iter/s)": 0.062895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 766.0, "completions/mean_length": 468.71429443359375, "completions/min_length": 350.0, "entropy/max": 0.5546875, "entropy/mean": 0.35546875, "entropy/min": 0.158203125, "epoch": 0.984, "grad_norm": 1.3097752195029715, "kl": 0.263671875, "learning_rate": 1.2886827815440372e-09, "loss": 0.002676494186744094, "memory(GiB)": 137.41, "reward": 2.417630672454834, "reward_std": 0.18030908703804016, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.756335973739624, "rewards/EvidenceHallucination/std": 0.3809882700443268, "rewards/Evidence_Num_Record/mean": 3.4761905670166016, "rewards/Evidence_Num_Record/std": 0.6339229941368103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8333333730697632, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 1.0996966361999512, "rewards/VideoAccuracy/std": 0.4313112795352936, "step": 984, "train_speed(iter/s)": 0.062762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 749.0, "completions/mean_length": 499.452392578125, "completions/min_length": 310.0, "entropy/max": 1.6484375, "entropy/mean": 0.4765625, "entropy/min": 0.134765625, "epoch": 0.985, "grad_norm": 1.2582956870984567, "kl": 0.22265625, "learning_rate": 1.1326608169920371e-09, "loss": 0.002244186121970415, "memory(GiB)": 137.41, "reward": 2.0736777782440186, "reward_std": 0.18576598167419434, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.649118959903717, "rewards/EvidenceHallucination/std": 0.3512740433216095, "rewards/Evidence_Num_Record/mean": 4.38095235824585, "rewards/Evidence_Num_Record/std": 1.8206455707550049, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.877187192440033, "rewards/VideoAccuracy/std": 0.3439772427082062, "step": 985, "train_speed(iter/s)": 0.062612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 602.0, "completions/mean_length": 458.6190490722656, "completions/min_length": 299.0, "entropy/max": 0.76953125, "entropy/mean": 0.4765625, "entropy/min": 0.306640625, "epoch": 0.986, "grad_norm": 1.4153108898347777, "kl": 0.26171875, "learning_rate": 9.866974354560964e-10, "loss": 0.0026454180479049683, "memory(GiB)": 137.41, "reward": 1.6579073667526245, "reward_std": 0.4413139820098877, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.4636135995388031, "rewards/EvidenceHallucination/std": 0.4519502520561218, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9997095465660095, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1190476194024086, "rewards/HonestTime/std": 0.32777008414268494, "rewards/VideoAccuracy/mean": 0.5413752794265747, "rewards/VideoAccuracy/std": 0.4650043547153473, "step": 986, "train_speed(iter/s)": 0.062456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 581.0, "completions/mean_length": 461.9285888671875, "completions/min_length": 313.0, "entropy/max": 0.56640625, "entropy/mean": 0.4140625, "entropy/min": 0.28515625, "epoch": 0.987, "grad_norm": 1.4294908511870958, "kl": 0.291015625, "learning_rate": 8.507941067859015e-10, "loss": 0.0029428645502775908, "memory(GiB)": 137.41, "reward": 1.8523160219192505, "reward_std": 0.13490135967731476, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5050612688064575, "rewards/EvidenceHallucination/std": 0.4884006679058075, "rewards/Evidence_Num_Record/mean": 3.761904716491699, "rewards/Evidence_Num_Record/std": 0.6917465925216675, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.5, "rewards/HonestTime/std": 0.5060608386993408, "rewards/VideoAccuracy/mean": 0.651303768157959, "rewards/VideoAccuracy/std": 0.5998431444168091, "step": 987, "train_speed(iter/s)": 0.062331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 789.0, "completions/mean_length": 547.4285888671875, "completions/min_length": 367.0, "entropy/max": 0.91796875, "entropy/mean": 0.3203125, "entropy/min": 0.15625, "epoch": 0.988, "grad_norm": 0.8890710600096047, "kl": 0.2158203125, "learning_rate": 7.249521995263963e-10, "loss": 0.0021866951137781143, "memory(GiB)": 137.41, "reward": 1.7507095336914062, "reward_std": 0.07063025236129761, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3379446864128113, "rewards/EvidenceHallucination/std": 0.42451003193855286, "rewards/Evidence_Num_Record/mean": 3.642857313156128, "rewards/Evidence_Num_Record/std": 0.8211066126823425, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6666666865348816, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.5497872829437256, "rewards/VideoAccuracy/std": 0.39387640357017517, "step": 988, "train_speed(iter/s)": 0.062227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 756.0, "completions/mean_length": 477.0476379394531, "completions/min_length": 338.0, "entropy/max": 0.7578125, "entropy/mean": 0.455078125, "entropy/min": 0.306640625, "epoch": 0.989, "grad_norm": 1.1880257355559192, "kl": 0.275390625, "learning_rate": 6.091729809042379e-10, "loss": 0.0027825646102428436, "memory(GiB)": 137.41, "reward": 1.5465867519378662, "reward_std": 0.11989744752645493, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.389413446187973, "rewards/EvidenceHallucination/std": 0.46374890208244324, "rewards/Evidence_Num_Record/mean": 4.404761791229248, "rewards/Evidence_Num_Record/std": 1.0135550498962402, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.43537071347236633, "rewards/VideoAccuracy/std": 0.4611511826515198, "step": 989, "train_speed(iter/s)": 0.062084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 705.0, "completions/mean_length": 466.4285888671875, "completions/min_length": 337.0, "entropy/max": 0.65625, "entropy/mean": 0.443359375, "entropy/min": 0.296875, "epoch": 0.99, "grad_norm": 1.16863477938996, "kl": 0.283203125, "learning_rate": 5.034576168149174e-10, "loss": 0.002865845337510109, "memory(GiB)": 137.41, "reward": 1.6263631582260132, "reward_std": 0.3727450370788574, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3417263925075531, "rewards/EvidenceHallucination/std": 0.42671677470207214, "rewards/Evidence_Num_Record/mean": 4.261904716491699, "rewards/Evidence_Num_Record/std": 1.3262733221054077, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.5246845483779907, "rewards/VideoAccuracy/std": 0.5592222213745117, "step": 990, "train_speed(iter/s)": 0.06194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 774.0, "completions/mean_length": 513.5952758789062, "completions/min_length": 353.0, "entropy/max": 0.470703125, "entropy/mean": 0.25390625, "entropy/min": 0.115234375, "epoch": 0.991, "grad_norm": 1.1156864175554875, "kl": 0.2265625, "learning_rate": 4.078071718107701e-10, "loss": 0.002295741345733404, "memory(GiB)": 137.41, "reward": 2.361910820007324, "reward_std": 0.0892949029803276, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6723664999008179, "rewards/EvidenceHallucination/std": 0.4161950349807739, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 0.594203531742096, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 1.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 1.027437448501587, "rewards/VideoAccuracy/std": 0.38673079013824463, "step": 991, "train_speed(iter/s)": 0.06179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 767.0, "completions/mean_length": 482.26190185546875, "completions/min_length": 282.0, "entropy/max": 0.9140625, "entropy/mean": 0.4609375, "entropy/min": 0.267578125, "epoch": 0.992, "grad_norm": 1.443864793433425, "kl": 0.255859375, "learning_rate": 3.2222260909087194e-10, "loss": 0.002598737133666873, "memory(GiB)": 137.41, "reward": 1.8899970054626465, "reward_std": 0.306724488735199, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6235135793685913, "rewards/EvidenceHallucination/std": 0.40866008400917053, "rewards/Evidence_Num_Record/mean": 4.61904764175415, "rewards/Evidence_Num_Record/std": 1.4971905946731567, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1666666716337204, "rewards/HonestTime/std": 0.37719547748565674, "rewards/VideoAccuracy/mean": 0.7319609522819519, "rewards/VideoAccuracy/std": 0.4215676784515381, "step": 992, "train_speed(iter/s)": 0.061501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 814.0, "completions/mean_length": 460.9285888671875, "completions/min_length": 304.0, "entropy/max": 0.83984375, "entropy/mean": 0.47265625, "entropy/min": 0.265625, "epoch": 0.993, "grad_norm": 1.2905090614888646, "kl": 0.251953125, "learning_rate": 2.4670479049082594e-10, "loss": 0.002519114874303341, "memory(GiB)": 137.41, "reward": 1.3910771608352661, "reward_std": 0.3596101999282837, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.29593271017074585, "rewards/EvidenceHallucination/std": 0.43152523040771484, "rewards/Evidence_Num_Record/mean": 3.9761905670166016, "rewards/Evidence_Num_Record/std": 0.9236221313476562, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.33189061284065247, "rewards/VideoAccuracy/std": 0.47508248686790466, "step": 993, "train_speed(iter/s)": 0.061359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 657.0, "completions/mean_length": 453.8333435058594, "completions/min_length": 339.0, "entropy/max": 0.6171875, "entropy/mean": 0.373046875, "entropy/min": 0.1611328125, "epoch": 0.994, "grad_norm": 1.1471652869518276, "kl": 0.267578125, "learning_rate": 1.81254476474213e-10, "loss": 0.0026834774762392044, "memory(GiB)": 137.41, "reward": 2.1440560817718506, "reward_std": 0.13914991915225983, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.6713060736656189, "rewards/EvidenceHallucination/std": 0.3882835805416107, "rewards/Evidence_Num_Record/mean": 3.5238096714019775, "rewards/Evidence_Num_Record/std": 0.8333913683891296, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.8095238208770752, "rewards/HonestTime/std": 0.39743661880493164, "rewards/VideoAccuracy/mean": 0.8478900790214539, "rewards/VideoAccuracy/std": 0.42282700538635254, "step": 994, "train_speed(iter/s)": 0.061239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 839.0, "completions/mean_length": 507.3809509277344, "completions/min_length": 355.0, "entropy/max": 0.85546875, "entropy/mean": 0.41796875, "entropy/min": 0.1572265625, "epoch": 0.995, "grad_norm": 1.2029400801179002, "kl": 0.2451171875, "learning_rate": 1.258723261249317e-10, "loss": 0.0024793946649879217, "memory(GiB)": 137.41, "reward": 2.1752610206604004, "reward_std": 0.13766251504421234, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.8124963641166687, "rewards/EvidenceHallucination/std": 0.31519562005996704, "rewards/Evidence_Num_Record/mean": 3.7142858505249023, "rewards/Evidence_Num_Record/std": 0.7419721484184265, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.3333333432674408, "rewards/HonestTime/std": 0.47711876034736633, "rewards/VideoAccuracy/mean": 0.9460952281951904, "rewards/VideoAccuracy/std": 0.3199745714664459, "step": 995, "train_speed(iter/s)": 0.061039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 736.0, "completions/mean_length": 471.26190185546875, "completions/min_length": 349.0, "entropy/max": 0.65625, "entropy/mean": 0.4453125, "entropy/min": 0.275390625, "epoch": 0.996, "grad_norm": 0.9239728844892342, "kl": 0.259765625, "learning_rate": 8.055889714064789e-11, "loss": 0.0026135165244340897, "memory(GiB)": 137.41, "reward": 1.479562759399414, "reward_std": 0.18091247975826263, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.3740043044090271, "rewards/EvidenceHallucination/std": 0.4622194170951843, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.1243788003921509, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.4047619104385376, "rewards/VideoAccuracy/std": 0.49679574370384216, "step": 996, "train_speed(iter/s)": 0.060871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 665.0, "completions/mean_length": 452.0238037109375, "completions/min_length": 324.0, "entropy/max": 0.57421875, "entropy/mean": 0.43359375, "entropy/min": 0.29296875, "epoch": 0.997, "grad_norm": 1.3533168229363475, "kl": 0.28125, "learning_rate": 4.5314645827132516e-11, "loss": 0.0028381943702697754, "memory(GiB)": 137.41, "reward": 2.030975103378296, "reward_std": 0.26551011204719543, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5573198795318604, "rewards/EvidenceHallucination/std": 0.44968104362487793, "rewards/Evidence_Num_Record/mean": 3.547619104385376, "rewards/Evidence_Num_Record/std": 0.8323455452919006, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.4761904776096344, "rewards/HonestTime/std": 0.5054867267608643, "rewards/VideoAccuracy/mean": 0.8242730498313904, "rewards/VideoAccuracy/std": 0.5885381698608398, "step": 997, "train_speed(iter/s)": 0.060753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 842.0, "completions/mean_length": 598.6428833007812, "completions/min_length": 395.0, "entropy/max": 0.97265625, "entropy/mean": 0.3359375, "entropy/min": 0.1318359375, "epoch": 0.998, "grad_norm": 1.034021068336648, "kl": 0.20703125, "learning_rate": 2.0139927093487663e-11, "loss": 0.002108077285811305, "memory(GiB)": 137.41, "reward": 2.166386127471924, "reward_std": 0.21136659383773804, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.7145857214927673, "rewards/EvidenceHallucination/std": 0.3231663107872009, "rewards/Evidence_Num_Record/mean": 4.309524059295654, "rewards/Evidence_Num_Record/std": 1.0704021453857422, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.6190476417541504, "rewards/HonestTime/std": 0.4915074408054352, "rewards/VideoAccuracy/mean": 0.8996596932411194, "rewards/VideoAccuracy/std": 0.3587428331375122, "step": 998, "train_speed(iter/s)": 0.060568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 812.0, "completions/mean_length": 470.5476379394531, "completions/min_length": 328.0, "entropy/max": 0.67578125, "entropy/mean": 0.41796875, "entropy/min": 0.251953125, "epoch": 0.999, "grad_norm": 1.3317680302128188, "kl": 0.271484375, "learning_rate": 5.034994448926966e-12, "loss": 0.002765919780358672, "memory(GiB)": 137.41, "reward": 1.734266757965088, "reward_std": 0.38671621680259705, "rewards/EvidenceFormat/mean": 1.0, "rewards/EvidenceFormat/std": 0.0, "rewards/EvidenceHallucination/mean": 0.5760961174964905, "rewards/EvidenceHallucination/std": 0.4836040437221527, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 1.528855323791504, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.0, "rewards/HonestTime/std": 0.0, "rewards/VideoAccuracy/mean": 0.6190476417541504, "rewards/VideoAccuracy/std": 0.4915074408054352, "step": 999, "train_speed(iter/s)": 0.060436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1036.0, "completions/mean_length": 504.5238037109375, "completions/min_length": 274.0, "entropy/max": 0.80859375, "entropy/mean": 0.4609375, "entropy/min": 0.251953125, "epoch": 1.0, "grad_norm": 1.28633537826959, "kl": 0.2421875, "learning_rate": 0.0, "loss": 0.0024761264212429523, "memory(GiB)": 137.41, "reward": 1.4781855344772339, "reward_std": 0.22372941672801971, "rewards/EvidenceFormat/mean": 0.9761905074119568, "rewards/EvidenceFormat/std": 0.15430335700511932, "rewards/EvidenceHallucination/mean": 0.3453802764415741, "rewards/EvidenceHallucination/std": 0.4514380693435669, "rewards/Evidence_Num_Record/mean": 4.166666507720947, "rewards/Evidence_Num_Record/std": 2.8190698623657227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "rewards/HonestTime/mean": 0.1428571492433548, "rewards/HonestTime/std": 0.3541688024997711, "rewards/VideoAccuracy/mean": 0.3924427628517151, "rewards/VideoAccuracy/std": 0.49488815665245056, "step": 1000, "train_speed(iter/s)": 0.06029 } ], "logging_steps": 1, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 7, "trial_name": null, "trial_params": null }