diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,36034 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100000, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1042.0, + "completions/mean_length": 538.2380981445312, + "completions/min_length": 296.0, + "entropy/max": 0.94140625, + "entropy/mean": 0.515625, + "entropy/min": 0.2197265625, + "epoch": 0.001, + "grad_norm": 1.4225373801095518, + "kl": 0.0, + "learning_rate": 2e-07, + "loss": 2.4977188672892225e-07, + "memory(GiB)": 122.13, + "reward": 1.0490549802780151, + "reward_std": 0.27390730381011963, + "rewards/EvidenceFormat/mean": 0.6547619104385376, + "rewards/EvidenceFormat/std": 0.23395057022571564, + "rewards/EvidenceHallucination/mean": 0.03694198280572891, + "rewards/EvidenceHallucination/std": 0.13737311959266663, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 1.4362164735794067, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2142857164144516, + "rewards/VideoAccuracy/std": 0.41529974341392517, + "step": 1, + "train_speed(iter/s)": 0.007082 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 572.7857055664062, + "completions/min_length": 316.0, + "entropy/max": 1.3671875, + "entropy/mean": 0.57421875, + "entropy/min": 0.22265625, + "epoch": 0.002, + "grad_norm": 1.43247077891897, + "kl": 0.0, + "learning_rate": 4e-07, + "loss": 2.7531672230907134e-07, + "memory(GiB)": 144.56, + "reward": 1.0515960454940796, + "reward_std": 0.3841308057308197, + "rewards/EvidenceFormat/mean": 0.6785714626312256, + "rewards/EvidenceFormat/std": 0.3088418245315552, + "rewards/EvidenceHallucination/mean": 0.04964689537882805, + "rewards/EvidenceHallucination/std": 0.12500926852226257, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 5.710444450378418, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2142857164144516, + "rewards/VideoAccuracy/std": 0.41529974341392517, + "step": 2, + "train_speed(iter/s)": 0.009314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1034.0, + "completions/mean_length": 503.4761962890625, + "completions/min_length": 181.0, + "entropy/max": 1.0390625, + "entropy/mean": 0.447265625, + "entropy/min": 0.111328125, + "epoch": 0.003, + "grad_norm": 1.2979495807156205, + "kl": 0.00079345703125, + "learning_rate": 6e-07, + "loss": 8.431219612248242e-06, + "memory(GiB)": 144.91, + "reward": 1.3539303541183472, + "reward_std": 0.45905885100364685, + "rewards/EvidenceFormat/mean": 0.7976190447807312, + "rewards/EvidenceFormat/std": 0.27183935046195984, + "rewards/EvidenceHallucination/mean": 0.09251835197210312, + "rewards/EvidenceHallucination/std": 0.19499725103378296, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 1.6952828168869019, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4366172254085541, + "rewards/VideoAccuracy/std": 0.48768168687820435, + "step": 3, + "train_speed(iter/s)": 0.011757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 697.0, + "completions/mean_length": 462.3333435058594, + "completions/min_length": 281.0, + "entropy/max": 0.50390625, + "entropy/mean": 0.359375, + "entropy/min": 0.2177734375, + "epoch": 0.004, + "grad_norm": 1.3667239353418728, + "kl": 0.000659942626953125, + "learning_rate": 8e-07, + "loss": 6.982259037613403e-06, + "memory(GiB)": 144.91, + "reward": 1.2883940935134888, + "reward_std": 0.2476130723953247, + "rewards/EvidenceFormat/mean": 0.761904776096344, + "rewards/EvidenceFormat/std": 0.33564817905426025, + "rewards/EvidenceHallucination/mean": 0.0918847993016243, + "rewards/EvidenceHallucination/std": 0.17514179646968842, + "rewards/Evidence_Num_Record/mean": 2.6190476417541504, + "rewards/Evidence_Num_Record/std": 1.2869397401809692, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.523809552192688, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.28430283069610596, + "rewards/VideoAccuracy/std": 0.4535316228866577, + "step": 4, + "train_speed(iter/s)": 0.014002 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 665.6666870117188, + "completions/min_length": 343.0, + "entropy/max": 1.609375, + "entropy/mean": 0.54296875, + "entropy/min": 0.1865234375, + "epoch": 0.005, + "grad_norm": 1.245853661293062, + "kl": 0.000640869140625, + "learning_rate": 1e-06, + "loss": 7.0298005994118284e-06, + "memory(GiB)": 144.91, + "reward": 0.9924663305282593, + "reward_std": 0.2152191400527954, + "rewards/EvidenceFormat/mean": 0.75, + "rewards/EvidenceFormat/std": 0.27607882022857666, + "rewards/EvidenceHallucination/mean": 0.013307266868650913, + "rewards/EvidenceHallucination/std": 0.06059669703245163, + "rewards/Evidence_Num_Record/mean": 4.5, + "rewards/Evidence_Num_Record/std": 4.289806842803955, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500061869621277, + "rewards/VideoAccuracy/mean": 0.07432877272367477, + "rewards/VideoAccuracy/std": 0.1967284381389618, + "step": 5, + "train_speed(iter/s)": 0.013814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 587.0, + "completions/mean_length": 414.7857360839844, + "completions/min_length": 125.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.384765625, + "entropy/min": 0.2041015625, + "epoch": 0.006, + "grad_norm": 1.7852011276259492, + "kl": 0.000865936279296875, + "learning_rate": 1.2e-06, + "loss": 9.149313882517163e-06, + "memory(GiB)": 144.91, + "reward": 1.2794909477233887, + "reward_std": 0.29543742537498474, + "rewards/EvidenceFormat/mean": 0.5833333730697632, + "rewards/EvidenceFormat/std": 0.3477570414543152, + "rewards/EvidenceHallucination/mean": 0.05816828832030296, + "rewards/EvidenceHallucination/std": 0.1828164905309677, + "rewards/Evidence_Num_Record/mean": 2.0238096714019775, + "rewards/Evidence_Num_Record/std": 1.092950701713562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4761904776096344, + "rewards/VideoAccuracy/std": 0.5054867267608643, + "step": 6, + "train_speed(iter/s)": 0.014329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 741.0, + "completions/mean_length": 420.3095397949219, + "completions/min_length": 265.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.361328125, + "entropy/min": 0.166015625, + "epoch": 0.007, + "grad_norm": 1.4338167561294133, + "kl": 0.00122833251953125, + "learning_rate": 1.4e-06, + "loss": 1.2808613064407837e-05, + "memory(GiB)": 144.91, + "reward": 1.4019895792007446, + "reward_std": 0.419267475605011, + "rewards/EvidenceFormat/mean": 0.738095223903656, + "rewards/EvidenceFormat/std": 0.3863224685192108, + "rewards/EvidenceHallucination/mean": 0.143430694937706, + "rewards/EvidenceHallucination/std": 0.22957825660705566, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 1.4109246730804443, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2142857164144516, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 0.4613986015319824, + "rewards/VideoAccuracy/std": 0.44075724482536316, + "step": 7, + "train_speed(iter/s)": 0.015176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 571.1666870117188, + "completions/min_length": 261.0, + "entropy/max": 0.6953125, + "entropy/mean": 0.353515625, + "entropy/min": 0.1552734375, + "epoch": 0.008, + "grad_norm": 1.1848593304684465, + "kl": 0.00173187255859375, + "learning_rate": 1.6e-06, + "loss": 1.9516264728736132e-05, + "memory(GiB)": 144.91, + "reward": 1.365936517715454, + "reward_std": 0.3504869341850281, + "rewards/EvidenceFormat/mean": 0.8809524178504944, + "rewards/EvidenceFormat/std": 0.2661725878715515, + "rewards/EvidenceHallucination/mean": 0.0939042791724205, + "rewards/EvidenceHallucination/std": 0.21233110129833221, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 3.964124917984009, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.5952380895614624, + "rewards/HonestTime/std": 0.49679574370384216, + "rewards/VideoAccuracy/mean": 0.2995365560054779, + "rewards/VideoAccuracy/std": 0.3632737100124359, + "step": 8, + "train_speed(iter/s)": 0.015084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1694.0, + "completions/mean_length": 485.9285888671875, + "completions/min_length": 229.0, + "entropy/max": 1.3984375, + "entropy/mean": 0.453125, + "entropy/min": 0.1259765625, + "epoch": 0.009, + "grad_norm": 1.5868900531869705, + "kl": 0.0174560546875, + "learning_rate": 1.8e-06, + "loss": 0.00018728773284237832, + "memory(GiB)": 144.91, + "reward": 1.1515543460845947, + "reward_std": 0.39280664920806885, + "rewards/EvidenceFormat/mean": 0.8452380895614624, + "rewards/EvidenceFormat/std": 0.25870442390441895, + "rewards/EvidenceHallucination/mean": 0.07324790209531784, + "rewards/EvidenceHallucination/std": 0.23180602490901947, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 3.1018333435058594, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2142857164144516, + "rewards/VideoAccuracy/std": 0.4152997136116028, + "step": 9, + "train_speed(iter/s)": 0.015322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 474.0476379394531, + "completions/min_length": 254.0, + "entropy/max": 0.7109375, + "entropy/mean": 0.412109375, + "entropy/min": 0.1103515625, + "epoch": 0.01, + "grad_norm": 1.3485732635442316, + "kl": 0.004730224609375, + "learning_rate": 2e-06, + "loss": 4.4209620682522655e-05, + "memory(GiB)": 144.91, + "reward": 1.1982691287994385, + "reward_std": 0.35777562856674194, + "rewards/EvidenceFormat/mean": 0.9285714626312256, + "rewards/EvidenceFormat/std": 0.26066118478775024, + "rewards/EvidenceHallucination/mean": 0.06569261848926544, + "rewards/EvidenceHallucination/std": 0.20155726373195648, + "rewards/Evidence_Num_Record/mean": 2.6190476417541504, + "rewards/Evidence_Num_Record/std": 1.513393521308899, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.23274962604045868, + "rewards/VideoAccuracy/std": 0.3488112688064575, + "step": 10, + "train_speed(iter/s)": 0.015311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1539.0, + "completions/mean_length": 598.3809814453125, + "completions/min_length": 327.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.326171875, + "entropy/min": 0.150390625, + "epoch": 0.011, + "grad_norm": 1.2083434940567575, + "kl": 0.00131988525390625, + "learning_rate": 1.9999949650055508e-06, + "loss": 1.3374148693401366e-05, + "memory(GiB)": 144.94, + "reward": 1.456288456916809, + "reward_std": 0.40123605728149414, + "rewards/EvidenceFormat/mean": 0.8333333730697632, + "rewards/EvidenceFormat/std": 0.37719547748565674, + "rewards/EvidenceHallucination/mean": 0.08484998345375061, + "rewards/EvidenceHallucination/std": 0.19040553271770477, + "rewards/Evidence_Num_Record/mean": 2.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.4715656042099, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.38931843638420105, + "rewards/VideoAccuracy/std": 0.48434221744537354, + "step": 11, + "train_speed(iter/s)": 0.015617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/mean_length": 573.7142944335938, + "completions/min_length": 328.0, + "entropy/max": 1.7578125, + "entropy/mean": 0.6953125, + "entropy/min": 0.216796875, + "epoch": 0.012, + "grad_norm": 1.3300925852204148, + "kl": 0.004669189453125, + "learning_rate": 1.9999798600729064e-06, + "loss": 4.8115143727045506e-05, + "memory(GiB)": 144.94, + "reward": 1.4905434846878052, + "reward_std": 0.4766744375228882, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19081245362758636, + "rewards/EvidenceHallucination/std": 0.23859429359436035, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 2.3507728576660156, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4523809552192688, + "rewards/VideoAccuracy/std": 0.503760576248169, + "step": 12, + "train_speed(iter/s)": 0.015851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09523809523809523, + "completions/max_length": 2625.0, + "completions/mean_length": 737.5, + "completions/min_length": 278.0, + "entropy/max": 2.890625, + "entropy/mean": 0.56640625, + "entropy/min": 0.1083984375, + "epoch": 0.013, + "grad_norm": 1.3719241393661707, + "kl": 0.013916015625, + "learning_rate": 1.9999546853541726e-06, + "loss": 0.00014006666606292129, + "memory(GiB)": 144.94, + "reward": 1.2101057767868042, + "reward_std": 0.4757480025291443, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.09935478866100311, + "rewards/EvidenceHallucination/std": 0.226935476064682, + "rewards/Evidence_Num_Record/mean": 6.023809432983398, + "rewards/Evidence_Num_Record/std": 8.721960067749023, + "rewards/Format/mean": 0.9047619104385376, + "rewards/Format/std": 0.297101765871048, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.23785389959812164, + "rewards/VideoAccuracy/std": 0.43064427375793457, + "step": 13, + "train_speed(iter/s)": 0.01551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 749.0, + "completions/mean_length": 433.3571472167969, + "completions/min_length": 252.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.390625, + "entropy/min": 0.248046875, + "epoch": 0.014, + "grad_norm": 1.3206631905316553, + "kl": 0.0034637451171875, + "learning_rate": 1.9999194411028592e-06, + "loss": 3.436854967731051e-05, + "memory(GiB)": 144.94, + "reward": 1.7792860269546509, + "reward_std": 0.19403386116027832, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.22832076251506805, + "rewards/EvidenceHallucination/std": 0.28095993399620056, + "rewards/Evidence_Num_Record/mean": 2.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.7948732376098633, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.6217170357704163, + "rewards/VideoAccuracy/std": 0.5551446676254272, + "step": 14, + "train_speed(iter/s)": 0.016096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1776.0, + "completions/mean_length": 582.1428833007812, + "completions/min_length": 353.0, + "entropy/max": 1.90625, + "entropy/mean": 0.55078125, + "entropy/min": 0.1572265625, + "epoch": 0.015, + "grad_norm": 1.1286443819444592, + "kl": 0.003997802734375, + "learning_rate": 1.9998741276738752e-06, + "loss": 4.373884803499095e-05, + "memory(GiB)": 144.94, + "reward": 1.4701656103134155, + "reward_std": 0.29070937633514404, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19159749150276184, + "rewards/EvidenceHallucination/std": 0.29467591643333435, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 2.836320638656616, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.36517950892448425, + "rewards/VideoAccuracy/std": 0.4534320533275604, + "step": 15, + "train_speed(iter/s)": 0.016157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 638.0, + "completions/mean_length": 438.5, + "completions/min_length": 250.0, + "entropy/max": 0.625, + "entropy/mean": 0.3984375, + "entropy/min": 0.251953125, + "epoch": 0.016, + "grad_norm": 1.156287440614561, + "kl": 0.00811767578125, + "learning_rate": 1.9998187455235257e-06, + "loss": 8.322580833919346e-05, + "memory(GiB)": 144.94, + "reward": 1.3306825160980225, + "reward_std": 0.251572847366333, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.10579296946525574, + "rewards/EvidenceHallucination/std": 0.21332131326198578, + "rewards/Evidence_Num_Record/mean": 2.738095283508301, + "rewards/Evidence_Num_Record/std": 0.9642266035079956, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 16, + "train_speed(iter/s)": 0.015815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 568.7380981445312, + "completions/min_length": 274.0, + "entropy/max": 0.8359375, + "entropy/mean": 0.380859375, + "entropy/min": 0.1259765625, + "epoch": 0.017, + "grad_norm": 1.4101739983074033, + "kl": 0.0037384033203125, + "learning_rate": 1.999753295209509e-06, + "loss": 3.897860005963594e-05, + "memory(GiB)": 145.69, + "reward": 1.7066527605056763, + "reward_std": 0.3545313775539398, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1598617285490036, + "rewards/EvidenceHallucination/std": 0.2706254720687866, + "rewards/Evidence_Num_Record/mean": 4.690476417541504, + "rewards/Evidence_Num_Record/std": 6.809205055236816, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6318230628967285, + "rewards/VideoAccuracy/std": 0.5390254259109497, + "step": 17, + "train_speed(iter/s)": 0.016041 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1169.0, + "completions/mean_length": 587.9285888671875, + "completions/min_length": 293.0, + "entropy/max": 1.0, + "entropy/mean": 0.384765625, + "entropy/min": 0.1318359375, + "epoch": 0.018, + "grad_norm": 1.2248047866328504, + "kl": 0.0038299560546875, + "learning_rate": 1.999677777390909e-06, + "loss": 3.880564327118918e-05, + "memory(GiB)": 145.69, + "reward": 1.6634336709976196, + "reward_std": 0.4180944859981537, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22601404786109924, + "rewards/EvidenceHallucination/std": 0.30248087644577026, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 0.9169965982437134, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.48965945839881897, + "rewards/VideoAccuracy/std": 0.43758147954940796, + "step": 18, + "train_speed(iter/s)": 0.016317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 688.0, + "completions/mean_length": 464.0952453613281, + "completions/min_length": 233.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.451171875, + "entropy/min": 0.28125, + "epoch": 0.019, + "grad_norm": 1.279202550632437, + "kl": 0.007568359375, + "learning_rate": 1.999592192828189e-06, + "loss": 7.748680945951492e-05, + "memory(GiB)": 145.69, + "reward": 1.440986156463623, + "reward_std": 0.35693225264549255, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18112100660800934, + "rewards/EvidenceHallucination/std": 0.2568683624267578, + "rewards/Evidence_Num_Record/mean": 3.095238208770752, + "rewards/Evidence_Num_Record/std": 0.9830148220062256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4047619104385376, + "rewards/VideoAccuracy/std": 0.49679574370384216, + "step": 19, + "train_speed(iter/s)": 0.016439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 502.9761962890625, + "completions/min_length": 258.0, + "entropy/max": 1.484375, + "entropy/mean": 0.46484375, + "entropy/min": 0.10693359375, + "epoch": 0.02, + "grad_norm": 1.3709799718611808, + "kl": 0.00537109375, + "learning_rate": 1.999496542383185e-06, + "loss": 5.439633605419658e-05, + "memory(GiB)": 145.69, + "reward": 1.2718842029571533, + "reward_std": 0.4565136432647705, + "rewards/EvidenceFormat/mean": 0.8690476417541504, + "rewards/EvidenceFormat/std": 0.332388311624527, + "rewards/EvidenceHallucination/mean": 0.11449707299470901, + "rewards/EvidenceHallucination/std": 0.2324623316526413, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 4.580674648284912, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.32636573910713196, + "rewards/VideoAccuracy/std": 0.42334258556365967, + "step": 20, + "train_speed(iter/s)": 0.016233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 834.0, + "completions/mean_length": 527.3333129882812, + "completions/min_length": 279.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.337890625, + "entropy/min": 0.2041015625, + "epoch": 0.021, + "grad_norm": 1.3578766651839085, + "kl": 0.005828857421875, + "learning_rate": 1.9993908270190957e-06, + "loss": 6.010277138557285e-05, + "memory(GiB)": 145.74, + "reward": 2.1086716651916504, + "reward_std": 0.2845557630062103, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.2609308958053589, + "rewards/EvidenceHallucination/std": 0.28345152735710144, + "rewards/Evidence_Num_Record/mean": 2.904762029647827, + "rewards/Evidence_Num_Record/std": 1.0548268556594849, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9285714626312256, + "rewards/HonestTime/std": 0.26066118478775024, + "rewards/VideoAccuracy/mean": 0.8826761245727539, + "rewards/VideoAccuracy/std": 0.339295893907547, + "step": 21, + "train_speed(iter/s)": 0.016293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/mean_length": 494.26190185546875, + "completions/min_length": 311.0, + "entropy/max": 1.5625, + "entropy/mean": 0.59375, + "entropy/min": 0.302734375, + "epoch": 0.022, + "grad_norm": 1.253736043947575, + "kl": 0.00787353515625, + "learning_rate": 1.9992750478004735e-06, + "loss": 8.163228631019592e-05, + "memory(GiB)": 145.74, + "reward": 1.3902297019958496, + "reward_std": 0.36313265562057495, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.10591033846139908, + "rewards/EvidenceHallucination/std": 0.22784608602523804, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 2.2285237312316895, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.380952388048172, + "rewards/VideoAccuracy/std": 0.4915074110031128, + "step": 22, + "train_speed(iter/s)": 0.016359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 735.0, + "completions/mean_length": 442.5952453613281, + "completions/min_length": 244.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.412109375, + "entropy/min": 0.228515625, + "epoch": 0.023, + "grad_norm": 1.3193304711961666, + "kl": 0.01055908203125, + "learning_rate": 1.999149205893214e-06, + "loss": 0.00010886572999879718, + "memory(GiB)": 145.74, + "reward": 1.3971387147903442, + "reward_std": 0.39149364829063416, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.08098088949918747, + "rewards/EvidenceHallucination/std": 0.20118315517902374, + "rewards/Evidence_Num_Record/mean": 2.952380895614624, + "rewards/Evidence_Num_Record/std": 0.9865530133247375, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.38094261288642883, + "rewards/VideoAccuracy/std": 0.4571596384048462, + "step": 23, + "train_speed(iter/s)": 0.016467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 803.0, + "completions/mean_length": 468.4761962890625, + "completions/min_length": 305.0, + "entropy/max": 0.5625, + "entropy/mean": 0.357421875, + "entropy/min": 0.1640625, + "epoch": 0.024, + "grad_norm": 1.0831692513277686, + "kl": 0.00848388671875, + "learning_rate": 1.9990133025645437e-06, + "loss": 8.65261972649023e-05, + "memory(GiB)": 145.74, + "reward": 1.7580333948135376, + "reward_std": 0.19246236979961395, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.19589346647262573, + "rewards/EvidenceHallucination/std": 0.2664523124694824, + "rewards/Evidence_Num_Record/mean": 2.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7419721484184265, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5476190447807312, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.6212357878684998, + "rewards/VideoAccuracy/std": 0.5090880990028381, + "step": 24, + "train_speed(iter/s)": 0.016795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/mean_length": 544.3333129882812, + "completions/min_length": 307.0, + "entropy/max": 1.09375, + "entropy/mean": 0.53515625, + "entropy/min": 0.228515625, + "epoch": 0.025, + "grad_norm": 1.1987348090196133, + "kl": 0.00848388671875, + "learning_rate": 1.998867339183008e-06, + "loss": 8.686093497090042e-05, + "memory(GiB)": 145.74, + "reward": 1.6837431192398071, + "reward_std": 0.20683422684669495, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22472654283046722, + "rewards/EvidenceHallucination/std": 0.27752161026000977, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 1.2742421627044678, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5721309781074524, + "rewards/VideoAccuracy/std": 0.4405735433101654, + "step": 25, + "train_speed(iter/s)": 0.016877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 483.4285888671875, + "completions/min_length": 285.0, + "entropy/max": 2.34375, + "entropy/mean": 0.5234375, + "entropy/min": 0.10595703125, + "epoch": 0.026, + "grad_norm": 1.2446589358643094, + "kl": 0.01214599609375, + "learning_rate": 1.998711317218456e-06, + "loss": 0.0001250980276381597, + "memory(GiB)": 145.74, + "reward": 1.3574994802474976, + "reward_std": 0.29396677017211914, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.1208304911851883, + "rewards/EvidenceHallucination/std": 0.2214687168598175, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 4.737720489501953, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3571428656578064, + "rewards/VideoAccuracy/std": 0.48496562242507935, + "step": 26, + "train_speed(iter/s)": 0.016664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 909.0, + "completions/mean_length": 478.0, + "completions/min_length": 301.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.396484375, + "entropy/min": 0.2001953125, + "epoch": 0.027, + "grad_norm": 1.1043514885610453, + "kl": 0.01055908203125, + "learning_rate": 1.9985452382420274e-06, + "loss": 0.00010822327749338001, + "memory(GiB)": 145.74, + "reward": 1.2988076210021973, + "reward_std": 0.3214387893676758, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.08240236341953278, + "rewards/EvidenceHallucination/std": 0.2126241773366928, + "rewards/Evidence_Num_Record/mean": 2.857142925262451, + "rewards/Evidence_Num_Record/std": 1.1384934186935425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.2156604379415512, + "rewards/VideoAccuracy/std": 0.42541253566741943, + "step": 27, + "train_speed(iter/s)": 0.016879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 611.857177734375, + "completions/min_length": 318.0, + "entropy/max": 2.25, + "entropy/mean": 0.5390625, + "entropy/min": 0.193359375, + "epoch": 0.028, + "grad_norm": 0.9921823880704781, + "kl": 0.006317138671875, + "learning_rate": 1.9983691039261353e-06, + "loss": 6.70288791297935e-05, + "memory(GiB)": 145.75, + "reward": 1.7207821607589722, + "reward_std": 0.23402933776378632, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24490571022033691, + "rewards/EvidenceHallucination/std": 0.3444436490535736, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 3.415905237197876, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.5952380895614624, + "rewards/HonestTime/std": 0.49679574370384216, + "rewards/VideoAccuracy/mean": 0.5646581053733826, + "rewards/VideoAccuracy/std": 0.4903152585029602, + "step": 28, + "train_speed(iter/s)": 0.016718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1589.0, + "completions/mean_length": 520.952392578125, + "completions/min_length": 302.0, + "entropy/max": 1.8125, + "entropy/mean": 0.546875, + "entropy/min": 0.2451171875, + "epoch": 0.029, + "grad_norm": 1.1703832973302537, + "kl": 0.01055908203125, + "learning_rate": 1.998182916044451e-06, + "loss": 0.00010844170901691541, + "memory(GiB)": 145.75, + "reward": 1.4587047100067139, + "reward_std": 0.4036465585231781, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1506655514240265, + "rewards/EvidenceHallucination/std": 0.22032076120376587, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 1.656998634338379, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4285714328289032, + "rewards/VideoAccuracy/std": 0.5008702874183655, + "step": 29, + "train_speed(iter/s)": 0.016683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.14285714285714285, + "completions/max_length": 2625.0, + "completions/mean_length": 753.0714721679688, + "completions/min_length": 249.0, + "entropy/max": 0.87890625, + "entropy/mean": 0.384765625, + "entropy/min": 0.07421875, + "epoch": 0.03, + "grad_norm": 1.2648430606097252, + "kl": 0.2373046875, + "learning_rate": 1.9979866764718843e-06, + "loss": 0.0022539356723427773, + "memory(GiB)": 145.75, + "reward": 1.3076739311218262, + "reward_std": 0.31140047311782837, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.09440181404352188, + "rewards/EvidenceHallucination/std": 0.1834997832775116, + "rewards/Evidence_Num_Record/mean": 7.476190567016602, + "rewards/Evidence_Num_Record/std": 10.572025299072266, + "rewards/Format/mean": 0.8571428656578064, + "rewards/Format/std": 0.3541688024997711, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.360222190618515, + "rewards/VideoAccuracy/std": 0.4654003381729126, + "step": 30, + "train_speed(iter/s)": 0.016483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 791.0, + "completions/mean_length": 504.4761962890625, + "completions/min_length": 305.0, + "entropy/max": 0.546875, + "entropy/mean": 0.322265625, + "entropy/min": 0.1513671875, + "epoch": 0.031, + "grad_norm": 1.1330015201937758, + "kl": 0.01275634765625, + "learning_rate": 1.997780387184565e-06, + "loss": 0.0001289858773816377, + "memory(GiB)": 145.75, + "reward": 2.1825642585754395, + "reward_std": 0.20884746313095093, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2839708924293518, + "rewards/EvidenceHallucination/std": 0.2942700684070587, + "rewards/Evidence_Num_Record/mean": 3.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.8323454856872559, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.9352939128875732, + "rewards/VideoAccuracy/std": 0.22921568155288696, + "step": 31, + "train_speed(iter/s)": 0.016511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2567.0, + "completions/mean_length": 571.6190795898438, + "completions/min_length": 273.0, + "entropy/max": 1.5390625, + "entropy/mean": 0.60546875, + "entropy/min": 0.09423828125, + "epoch": 0.032, + "grad_norm": 0.9103830840092558, + "kl": 0.01263427734375, + "learning_rate": 1.997564050259824e-06, + "loss": 0.00013467957614921033, + "memory(GiB)": 145.75, + "reward": 1.2602533102035522, + "reward_std": 0.2785269618034363, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.11079034209251404, + "rewards/EvidenceHallucination/std": 0.22842200100421906, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 5.751180171966553, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2380952388048172, + "rewards/VideoAccuracy/std": 0.43108054995536804, + "step": 32, + "train_speed(iter/s)": 0.016409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07142857142857142, + "completions/max_length": 2625.0, + "completions/mean_length": 679.0, + "completions/min_length": 292.0, + "entropy/max": 0.7421875, + "entropy/mean": 0.34765625, + "entropy/min": 0.09521484375, + "epoch": 0.033, + "grad_norm": 1.1513595208744345, + "kl": 0.3828125, + "learning_rate": 1.997337667876172e-06, + "loss": 0.0021276986226439476, + "memory(GiB)": 145.75, + "reward": 1.185469388961792, + "reward_std": 0.4486236274242401, + "rewards/EvidenceFormat/mean": 0.8809524178504944, + "rewards/EvidenceFormat/std": 0.32777005434036255, + "rewards/EvidenceHallucination/mean": 0.11636475473642349, + "rewards/EvidenceHallucination/std": 0.25253647565841675, + "rewards/Evidence_Num_Record/mean": 4.857142925262451, + "rewards/Evidence_Num_Record/std": 7.579083442687988, + "rewards/Format/mean": 0.9285714626312256, + "rewards/Format/std": 0.26066118478775024, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2574344873428345, + "rewards/VideoAccuracy/std": 0.42979881167411804, + "step": 33, + "train_speed(iter/s)": 0.016317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/mean_length": 494.26190185546875, + "completions/min_length": 267.0, + "entropy/max": 1.1484375, + "entropy/mean": 0.37890625, + "entropy/min": 0.1552734375, + "epoch": 0.034, + "grad_norm": 1.0925109804515087, + "kl": 0.018310546875, + "learning_rate": 1.9971012423132772e-06, + "loss": 0.00018710496078711003, + "memory(GiB)": 145.75, + "reward": 1.7562119960784912, + "reward_std": 0.12298288941383362, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19665543735027313, + "rewards/EvidenceHallucination/std": 0.269692063331604, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 1.064690351486206, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.5883094072341919, + "rewards/VideoAccuracy/std": 0.4778609871864319, + "step": 34, + "train_speed(iter/s)": 0.016519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 869.0, + "completions/mean_length": 510.452392578125, + "completions/min_length": 327.0, + "entropy/max": 1.046875, + "entropy/mean": 0.58203125, + "entropy/min": 0.1787109375, + "epoch": 0.035, + "grad_norm": 1.248908336888557, + "kl": 0.01300048828125, + "learning_rate": 1.9968547759519425e-06, + "loss": 0.00013340359146241099, + "memory(GiB)": 145.75, + "reward": 1.4004958868026733, + "reward_std": 0.34659308195114136, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1086525246500969, + "rewards/EvidenceHallucination/std": 0.21830326318740845, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.8850939869880676, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.31209874153137207, + "rewards/VideoAccuracy/std": 0.41593098640441895, + "step": 35, + "train_speed(iter/s)": 0.016603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 951.0, + "completions/mean_length": 431.5714416503906, + "completions/min_length": 267.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.361328125, + "entropy/min": 0.181640625, + "epoch": 0.036, + "grad_norm": 1.2278684525023777, + "kl": 0.0172119140625, + "learning_rate": 1.9965982712740806e-06, + "loss": 0.0001762424799380824, + "memory(GiB)": 145.75, + "reward": 1.3322027921676636, + "reward_std": 0.3921200931072235, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1133946031332016, + "rewards/EvidenceHallucination/std": 0.22268490493297577, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 1.5151193141937256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 36, + "train_speed(iter/s)": 0.01668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/mean_length": 410.76190185546875, + "completions/min_length": 246.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.330078125, + "entropy/min": 0.166015625, + "epoch": 0.037, + "grad_norm": 1.210366128733837, + "kl": 0.017578125, + "learning_rate": 1.996331730862691e-06, + "loss": 0.00018034478125628084, + "memory(GiB)": 145.75, + "reward": 1.5425349473953247, + "reward_std": 0.16982388496398926, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2175394743680954, + "rewards/EvidenceHallucination/std": 0.31620246171951294, + "rewards/Evidence_Num_Record/mean": 2.952380895614624, + "rewards/Evidence_Num_Record/std": 1.8206455707550049, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.43236052989959717, + "rewards/VideoAccuracy/std": 0.4914357364177704, + "step": 37, + "train_speed(iter/s)": 0.016778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 926.0, + "completions/mean_length": 509.21429443359375, + "completions/min_length": 295.0, + "entropy/max": 0.8828125, + "entropy/mean": 0.412109375, + "entropy/min": 0.1279296875, + "epoch": 0.038, + "grad_norm": 1.0799898044921437, + "kl": 0.01458740234375, + "learning_rate": 1.996055157401834e-06, + "loss": 0.00014850683510303497, + "memory(GiB)": 145.75, + "reward": 1.6375964879989624, + "reward_std": 0.2798987627029419, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.15529486536979675, + "rewards/EvidenceHallucination/std": 0.27660584449768066, + "rewards/Evidence_Num_Record/mean": 3.2142858505249023, + "rewards/Evidence_Num_Record/std": 0.8981204628944397, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4732043147087097, + "rewards/VideoAccuracy/std": 0.43239283561706543, + "step": 38, + "train_speed(iter/s)": 0.016841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 691.0, + "completions/mean_length": 413.73809814453125, + "completions/min_length": 273.0, + "entropy/max": 1.5234375, + "entropy/mean": 0.51171875, + "entropy/min": 0.16796875, + "epoch": 0.039, + "grad_norm": 1.360366339394654, + "kl": 0.0220947265625, + "learning_rate": 1.9957685536765995e-06, + "loss": 0.00022746861213818192, + "memory(GiB)": 145.75, + "reward": 1.3803130388259888, + "reward_std": 0.4381098747253418, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.11585082858800888, + "rewards/EvidenceHallucination/std": 0.1961347460746765, + "rewards/Evidence_Num_Record/mean": 2.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.8611501455307007, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3571428656578064, + "rewards/VideoAccuracy/std": 0.48496559262275696, + "step": 39, + "train_speed(iter/s)": 0.01702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 511.0, + "completions/mean_length": 344.73809814453125, + "completions/min_length": 247.0, + "entropy/max": 0.88671875, + "entropy/mean": 0.404296875, + "entropy/min": 0.2314453125, + "epoch": 0.04, + "grad_norm": 1.4382404832091151, + "kl": 0.0233154296875, + "learning_rate": 1.9954719225730845e-06, + "loss": 0.00023420357319992036, + "memory(GiB)": 145.75, + "reward": 1.3400732278823853, + "reward_std": 0.3356240689754486, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.17973728477954865, + "rewards/EvidenceHallucination/std": 0.31203538179397583, + "rewards/Evidence_Num_Record/mean": 2.357142925262451, + "rewards/Evidence_Num_Record/std": 0.5768471360206604, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.30412575602531433, + "rewards/VideoAccuracy/std": 0.44932904839515686, + "step": 40, + "train_speed(iter/s)": 0.016963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 866.0, + "completions/mean_length": 505.71429443359375, + "completions/min_length": 285.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.31640625, + "entropy/min": 0.1474609375, + "epoch": 0.041, + "grad_norm": 1.2328074341784987, + "kl": 0.0166015625, + "learning_rate": 1.995165267078361e-06, + "loss": 0.00016819580923765898, + "memory(GiB)": 145.75, + "reward": 1.7048735618591309, + "reward_std": 0.33454629778862, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24886250495910645, + "rewards/EvidenceHallucination/std": 0.35649824142456055, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.9833101630210876, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.4598630368709564, + "rewards/VideoAccuracy/std": 0.45265597105026245, + "step": 41, + "train_speed(iter/s)": 0.017079 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 678.0, + "completions/mean_length": 404.4047546386719, + "completions/min_length": 301.0, + "entropy/max": 1.2421875, + "entropy/mean": 0.58984375, + "entropy/min": 0.2216796875, + "epoch": 0.042, + "grad_norm": 1.4974546558176582, + "kl": 0.0262451171875, + "learning_rate": 1.994848590280447e-06, + "loss": 0.0002636639983393252, + "memory(GiB)": 145.75, + "reward": 1.4940990209579468, + "reward_std": 0.4433921277523041, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20859037339687347, + "rewards/EvidenceHallucination/std": 0.2495918869972229, + "rewards/Evidence_Num_Record/mean": 2.904762029647827, + "rewards/Evidence_Num_Record/std": 0.9055256247520447, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4523809552192688, + "rewards/VideoAccuracy/std": 0.503760576248169, + "step": 42, + "train_speed(iter/s)": 0.017094 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 804.0, + "completions/mean_length": 391.0952453613281, + "completions/min_length": 230.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.36328125, + "entropy/min": 0.2001953125, + "epoch": 0.043, + "grad_norm": 1.178164764207004, + "kl": 0.0277099609375, + "learning_rate": 1.994521895368273e-06, + "loss": 0.000281293730949983, + "memory(GiB)": 145.75, + "reward": 1.5839663743972778, + "reward_std": 0.2064477503299713, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1908642202615738, + "rewards/EvidenceHallucination/std": 0.2820035517215729, + "rewards/Evidence_Num_Record/mean": 2.904762029647827, + "rewards/Evidence_Num_Record/std": 1.0777013301849365, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5457934737205505, + "rewards/VideoAccuracy/std": 0.5020904541015625, + "step": 43, + "train_speed(iter/s)": 0.017206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/mean_length": 396.23809814453125, + "completions/min_length": 269.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.376953125, + "entropy/min": 0.189453125, + "epoch": 0.044, + "grad_norm": 1.480695163848795, + "kl": 0.0230712890625, + "learning_rate": 1.9941851856316543e-06, + "loss": 0.00023624445020686835, + "memory(GiB)": 145.75, + "reward": 1.9166960716247559, + "reward_std": 0.23344223201274872, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.32448601722717285, + "rewards/EvidenceHallucination/std": 0.3242557942867279, + "rewards/Evidence_Num_Record/mean": 2.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.6866910457611084, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.730370283126831, + "rewards/VideoAccuracy/std": 0.39827626943588257, + "step": 44, + "train_speed(iter/s)": 0.01731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 625.0, + "completions/mean_length": 414.5714416503906, + "completions/min_length": 265.0, + "entropy/max": 0.9453125, + "entropy/mean": 0.451171875, + "entropy/min": 0.1806640625, + "epoch": 0.045, + "grad_norm": 1.3572922073713911, + "kl": 0.02294921875, + "learning_rate": 1.993838464461254e-06, + "loss": 0.0002286377566633746, + "memory(GiB)": 145.75, + "reward": 1.5818372964859009, + "reward_std": 0.3606022596359253, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24391533434391022, + "rewards/EvidenceHallucination/std": 0.3106663227081299, + "rewards/Evidence_Num_Record/mean": 2.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7419722080230713, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.46638762950897217, + "rewards/VideoAccuracy/std": 0.4565890431404114, + "step": 45, + "train_speed(iter/s)": 0.017377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 519.0, + "completions/mean_length": 357.0952453613281, + "completions/min_length": 270.0, + "entropy/max": 0.796875, + "entropy/mean": 0.3984375, + "entropy/min": 0.2255859375, + "epoch": 0.046, + "grad_norm": 1.5353899061823615, + "kl": 0.034912109375, + "learning_rate": 1.9934817353485502e-06, + "loss": 0.00035044411197304726, + "memory(GiB)": 145.75, + "reward": 1.6934123039245605, + "reward_std": 0.4521592855453491, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.26005518436431885, + "rewards/EvidenceHallucination/std": 0.3091502785682678, + "rewards/Evidence_Num_Record/mean": 2.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.8333914279937744, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6414012908935547, + "rewards/VideoAccuracy/std": 0.4838889539241791, + "step": 46, + "train_speed(iter/s)": 0.01738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 512.0, + "completions/mean_length": 349.3571472167969, + "completions/min_length": 237.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.3515625, + "entropy/min": 0.201171875, + "epoch": 0.047, + "grad_norm": 1.3887883228401308, + "kl": 0.0296630859375, + "learning_rate": 1.993115001885801e-06, + "loss": 0.00029792386339977384, + "memory(GiB)": 145.75, + "reward": 1.5804702043533325, + "reward_std": 0.2644038200378418, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20623116195201874, + "rewards/EvidenceHallucination/std": 0.3177759349346161, + "rewards/Evidence_Num_Record/mean": 2.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.7635724544525146, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4725571274757385, + "rewards/VideoAccuracy/std": 0.4981401860713959, + "step": 47, + "train_speed(iter/s)": 0.017459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 972.0, + "completions/mean_length": 460.0714416503906, + "completions/min_length": 264.0, + "entropy/max": 0.9609375, + "entropy/mean": 0.404296875, + "entropy/min": 0.189453125, + "epoch": 0.048, + "grad_norm": 1.3369684895102307, + "kl": 0.02197265625, + "learning_rate": 1.9927382677660083e-06, + "loss": 0.00022233667550608516, + "memory(GiB)": 145.75, + "reward": 1.6147117614746094, + "reward_std": 0.37551945447921753, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19136324524879456, + "rewards/EvidenceHallucination/std": 0.29685333371162415, + "rewards/Evidence_Num_Record/mean": 2.904762029647827, + "rewards/Evidence_Num_Record/std": 0.9055256843566895, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.44310569763183594, + "rewards/VideoAccuracy/std": 0.4097944498062134, + "step": 48, + "train_speed(iter/s)": 0.017496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 466.0, + "completions/mean_length": 331.6428527832031, + "completions/min_length": 245.0, + "entropy/max": 1.1875, + "entropy/mean": 0.470703125, + "entropy/min": 0.2734375, + "epoch": 0.049, + "grad_norm": 1.4428138106775554, + "kl": 0.03515625, + "learning_rate": 1.992351536782881e-06, + "loss": 0.0003579538897611201, + "memory(GiB)": 145.75, + "reward": 1.62581205368042, + "reward_std": 0.28599339723587036, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27191683650016785, + "rewards/EvidenceHallucination/std": 0.2856244444847107, + "rewards/Evidence_Num_Record/mean": 2.261904716491699, + "rewards/Evidence_Num_Record/std": 0.49679577350616455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5714285969734192, + "rewards/VideoAccuracy/std": 0.5008703470230103, + "step": 49, + "train_speed(iter/s)": 0.017508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 756.0, + "completions/mean_length": 333.69049072265625, + "completions/min_length": 250.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.380859375, + "entropy/min": 0.236328125, + "epoch": 0.05, + "grad_norm": 1.473115622410385, + "kl": 0.033447265625, + "learning_rate": 1.991954812830795e-06, + "loss": 0.00033629988320171833, + "memory(GiB)": 145.75, + "reward": 1.46311616897583, + "reward_std": 0.3826943039894104, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21974819898605347, + "rewards/EvidenceHallucination/std": 0.29454198479652405, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.2971017360687256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4191664755344391, + "rewards/VideoAccuracy/std": 0.4755955636501312, + "step": 50, + "train_speed(iter/s)": 0.017564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 577.0, + "completions/mean_length": 408.4761962890625, + "completions/min_length": 272.0, + "entropy/max": 0.4921875, + "entropy/mean": 0.310546875, + "entropy/min": 0.18359375, + "epoch": 0.051, + "grad_norm": 0.9285880403146588, + "kl": 0.0279541015625, + "learning_rate": 1.991548099904757e-06, + "loss": 0.0006827338947914541, + "memory(GiB)": 145.75, + "reward": 1.8026541471481323, + "reward_std": 0.11613352596759796, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30948394536972046, + "rewards/EvidenceHallucination/std": 0.3458852767944336, + "rewards/Evidence_Num_Record/mean": 2.642857074737549, + "rewards/Evidence_Num_Record/std": 0.5328903794288635, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5407573580741882, + "rewards/VideoAccuracy/std": 0.4804791510105133, + "step": 51, + "train_speed(iter/s)": 0.017656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 547.0, + "completions/mean_length": 340.8571472167969, + "completions/min_length": 240.0, + "entropy/max": 1.109375, + "entropy/mean": 0.5703125, + "entropy/min": 0.25, + "epoch": 0.052, + "grad_norm": 1.571728483796054, + "kl": 0.046630859375, + "learning_rate": 1.991131402100361e-06, + "loss": 0.0004689935012720525, + "memory(GiB)": 145.75, + "reward": 1.7956736087799072, + "reward_std": 0.3757343888282776, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40693891048431396, + "rewards/EvidenceHallucination/std": 0.2946487367153168, + "rewards/Evidence_Num_Record/mean": 2.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.5702658891677856, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7142857313156128, + "rewards/VideoAccuracy/std": 0.45722997188568115, + "step": 52, + "train_speed(iter/s)": 0.017687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 518.0, + "completions/mean_length": 345.8809509277344, + "completions/min_length": 274.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.369140625, + "entropy/min": 0.2265625, + "epoch": 0.053, + "grad_norm": 1.1307122156401865, + "kl": 0.036865234375, + "learning_rate": 1.9907047236137496e-06, + "loss": 0.00037122820504009724, + "memory(GiB)": 145.75, + "reward": 1.574357509613037, + "reward_std": 0.19801990687847137, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.26589876413345337, + "rewards/EvidenceHallucination/std": 0.3213001489639282, + "rewards/Evidence_Num_Record/mean": 2.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.32777008414268494, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5211776494979858, + "rewards/VideoAccuracy/std": 0.5030077695846558, + "step": 53, + "train_speed(iter/s)": 0.017759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 867.0, + "completions/mean_length": 401.66668701171875, + "completions/min_length": 270.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.337890625, + "entropy/min": 0.2001953125, + "epoch": 0.054, + "grad_norm": 1.3118260485938609, + "kl": 0.032470703125, + "learning_rate": 1.99026806874157e-06, + "loss": 0.00032804696820676327, + "memory(GiB)": 145.75, + "reward": 1.6673247814178467, + "reward_std": 0.34364157915115356, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20881755650043488, + "rewards/EvidenceHallucination/std": 0.2592983841896057, + "rewards/Evidence_Num_Record/mean": 2.6190476417541504, + "rewards/Evidence_Num_Record/std": 1.0809296369552612, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4922279119491577, + "rewards/VideoAccuracy/std": 0.4640647768974304, + "step": 54, + "train_speed(iter/s)": 0.017802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 534.0, + "completions/mean_length": 380.3095397949219, + "completions/min_length": 237.0, + "entropy/max": 0.953125, + "entropy/mean": 0.49609375, + "entropy/min": 0.17578125, + "epoch": 0.055, + "grad_norm": 1.1253343964624638, + "kl": 0.035400390625, + "learning_rate": 1.9898214418809326e-06, + "loss": 0.00035857115290127695, + "memory(GiB)": 145.75, + "reward": 1.5038208961486816, + "reward_std": 0.19950318336486816, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24851542711257935, + "rewards/EvidenceHallucination/std": 0.3577192723751068, + "rewards/Evidence_Num_Record/mean": 2.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.5008703470230103, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.38745105266571045, + "rewards/VideoAccuracy/std": 0.5098853707313538, + "step": 55, + "train_speed(iter/s)": 0.017879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 421.0, + "completions/mean_length": 303.4047546386719, + "completions/min_length": 223.0, + "entropy/max": 1.0390625, + "entropy/mean": 0.39453125, + "entropy/min": 0.1943359375, + "epoch": 0.056, + "grad_norm": 1.341577119659631, + "kl": 0.05078125, + "learning_rate": 1.9893648475293647e-06, + "loss": 0.0005097612738609314, + "memory(GiB)": 145.75, + "reward": 1.6940311193466187, + "reward_std": 0.35864514112472534, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3749173879623413, + "rewards/EvidenceHallucination/std": 0.32496383786201477, + "rewards/Evidence_Num_Record/mean": 2.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.6115421056747437, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6190476417541504, + "rewards/VideoAccuracy/std": 0.4915074408054352, + "step": 56, + "train_speed(iter/s)": 0.017889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 451.0, + "completions/mean_length": 304.9761962890625, + "completions/min_length": 192.0, + "entropy/max": 0.515625, + "entropy/mean": 0.341796875, + "entropy/min": 0.201171875, + "epoch": 0.057, + "grad_norm": 1.2402957624594078, + "kl": 0.0419921875, + "learning_rate": 1.9888982902847653e-06, + "loss": 0.0004201154224574566, + "memory(GiB)": 145.75, + "reward": 1.3021140098571777, + "reward_std": 0.13262939453125, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.10855767130851746, + "rewards/EvidenceHallucination/std": 0.21811561286449432, + "rewards/Evidence_Num_Record/mean": 2.2142858505249023, + "rewards/Evidence_Num_Record/std": 0.4703768193721771, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.21373586356639862, + "rewards/VideoAccuracy/std": 0.39836785197257996, + "step": 57, + "train_speed(iter/s)": 0.01797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 729.0, + "completions/mean_length": 451.3809509277344, + "completions/min_length": 285.0, + "entropy/max": 1.046875, + "entropy/mean": 0.44140625, + "entropy/min": 0.2080078125, + "epoch": 0.058, + "grad_norm": 1.1393164343744608, + "kl": 0.033447265625, + "learning_rate": 1.988421774845362e-06, + "loss": 0.0003366103337612003, + "memory(GiB)": 145.75, + "reward": 1.8692452907562256, + "reward_std": 0.24157139658927917, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3845154047012329, + "rewards/EvidenceHallucination/std": 0.34343352913856506, + "rewards/Evidence_Num_Record/mean": 2.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.7168942093849182, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6590089797973633, + "rewards/VideoAccuracy/std": 0.4387028217315674, + "step": 58, + "train_speed(iter/s)": 0.017979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 487.0, + "completions/mean_length": 327.5476379394531, + "completions/min_length": 212.0, + "entropy/max": 1.1015625, + "entropy/mean": 0.48828125, + "entropy/min": 0.181640625, + "epoch": 0.059, + "grad_norm": 1.3073129517153632, + "kl": 0.0498046875, + "learning_rate": 1.98793530600966e-06, + "loss": 0.000497853965498507, + "memory(GiB)": 145.75, + "reward": 1.3389884233474731, + "reward_std": 0.3814266622066498, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14732250571250916, + "rewards/EvidenceHallucination/std": 0.236972376704216, + "rewards/Evidence_Num_Record/mean": 2.190476179122925, + "rewards/Evidence_Num_Record/std": 0.45468270778656006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 59, + "train_speed(iter/s)": 0.017988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 527.0, + "completions/mean_length": 327.0952453613281, + "completions/min_length": 240.0, + "entropy/max": 1.078125, + "entropy/mean": 0.47265625, + "entropy/min": 0.2236328125, + "epoch": 0.06, + "grad_norm": 1.4469148169383166, + "kl": 0.042236328125, + "learning_rate": 1.987438888676394e-06, + "loss": 0.00042862416012212634, + "memory(GiB)": 145.75, + "reward": 1.3566797971725464, + "reward_std": 0.32148292660713196, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18699973821640015, + "rewards/EvidenceHallucination/std": 0.300551176071167, + "rewards/Evidence_Num_Record/mean": 2.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.4371005594730377, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.31927984952926636, + "rewards/VideoAccuracy/std": 0.4577767848968506, + "step": 60, + "train_speed(iter/s)": 0.018037 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.0, + "completions/mean_length": 406.8333435058594, + "completions/min_length": 238.0, + "entropy/max": 0.48046875, + "entropy/mean": 0.27734375, + "entropy/min": 0.19921875, + "epoch": 0.061, + "grad_norm": 1.3020082787985705, + "kl": 0.032958984375, + "learning_rate": 1.986932527844482e-06, + "loss": 0.0003307433507870883, + "memory(GiB)": 145.75, + "reward": 1.9191844463348389, + "reward_std": 0.3503304719924927, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35135334730148315, + "rewards/EvidenceHallucination/std": 0.33822157979011536, + "rewards/Evidence_Num_Record/mean": 2.642857074737549, + "rewards/Evidence_Num_Record/std": 0.7593780755996704, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.648913562297821, + "rewards/VideoAccuracy/std": 0.41752249002456665, + "step": 61, + "train_speed(iter/s)": 0.018009 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 453.0, + "completions/mean_length": 331.8095397949219, + "completions/min_length": 231.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.50390625, + "entropy/min": 0.189453125, + "epoch": 0.062, + "grad_norm": 1.3181632186820587, + "kl": 0.053955078125, + "learning_rate": 1.9864162286129716e-06, + "loss": 0.0005426580901257694, + "memory(GiB)": 145.75, + "reward": 1.3668583631515503, + "reward_std": 0.26346227526664734, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16762493550777435, + "rewards/EvidenceHallucination/std": 0.2664393186569214, + "rewards/Evidence_Num_Record/mean": 2.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.48973196744918823, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3333333432674408, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 62, + "train_speed(iter/s)": 0.018081 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 490.0, + "completions/mean_length": 308.5476379394531, + "completions/min_length": 222.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.36328125, + "entropy/min": 0.20703125, + "epoch": 0.063, + "grad_norm": 1.3916223637864948, + "kl": 0.0546875, + "learning_rate": 1.9858899961809902e-06, + "loss": 0.0005506295128725469, + "memory(GiB)": 145.75, + "reward": 1.3270175457000732, + "reward_std": 0.4147834777832031, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.08936237543821335, + "rewards/EvidenceHallucination/std": 0.17755059897899628, + "rewards/Evidence_Num_Record/mean": 2.0238096714019775, + "rewards/Evidence_Num_Record/std": 0.26942533254623413, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.30914509296417236, + "rewards/VideoAccuracy/std": 0.46733471751213074, + "step": 63, + "train_speed(iter/s)": 0.018122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 519.0, + "completions/mean_length": 310.76190185546875, + "completions/min_length": 232.0, + "entropy/max": 0.392578125, + "entropy/mean": 0.298828125, + "entropy/min": 0.1513671875, + "epoch": 0.064, + "grad_norm": 1.096097392343389, + "kl": 0.0517578125, + "learning_rate": 1.985353835847693e-06, + "loss": 0.0007183492416515946, + "memory(GiB)": 145.75, + "reward": 1.6550796031951904, + "reward_std": 0.14945176243782043, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2174966037273407, + "rewards/EvidenceHallucination/std": 0.27638477087020874, + "rewards/Evidence_Num_Record/mean": 2.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.37719547748565674, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.47824692726135254, + "rewards/VideoAccuracy/std": 0.49298593401908875, + "step": 64, + "train_speed(iter/s)": 0.018253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 637.0, + "completions/mean_length": 397.3571472167969, + "completions/min_length": 282.0, + "entropy/max": 0.87890625, + "entropy/mean": 0.421875, + "entropy/min": 0.125, + "epoch": 0.065, + "grad_norm": 1.5461053818811703, + "kl": 0.0498046875, + "learning_rate": 1.984807753012208e-06, + "loss": 0.0005011714529246092, + "memory(GiB)": 145.75, + "reward": 1.7878233194351196, + "reward_std": 0.37429529428482056, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31716659665107727, + "rewards/EvidenceHallucination/std": 0.2837255001068115, + "rewards/Evidence_Num_Record/mean": 2.595238208770752, + "rewards/Evidence_Num_Record/std": 0.7669872045516968, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6577231287956238, + "rewards/VideoAccuracy/std": 0.4496304392814636, + "step": 65, + "train_speed(iter/s)": 0.018271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 454.0, + "completions/mean_length": 306.21429443359375, + "completions/min_length": 237.0, + "entropy/max": 1.0390625, + "entropy/mean": 0.373046875, + "entropy/min": 0.2177734375, + "epoch": 0.066, + "grad_norm": 1.5463420602194187, + "kl": 0.060302734375, + "learning_rate": 1.9842517531735837e-06, + "loss": 0.0006045004702173173, + "memory(GiB)": 145.75, + "reward": 1.6040095090866089, + "reward_std": 0.5427826642990112, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28338417410850525, + "rewards/EvidenceHallucination/std": 0.29940205812454224, + "rewards/Evidence_Num_Record/mean": 2.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.66999751329422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5473327040672302, + "rewards/VideoAccuracy/std": 0.5034978985786438, + "step": 66, + "train_speed(iter/s)": 0.018263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 404.0, + "completions/mean_length": 295.8095397949219, + "completions/min_length": 224.0, + "entropy/max": 0.625, + "entropy/mean": 0.357421875, + "entropy/min": 0.2080078125, + "epoch": 0.067, + "grad_norm": 1.391620079806127, + "kl": 0.055419921875, + "learning_rate": 1.983685841930732e-06, + "loss": 0.0005547074833884835, + "memory(GiB)": 145.75, + "reward": 1.4512310028076172, + "reward_std": 0.2663763165473938, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13462595641613007, + "rewards/EvidenceHallucination/std": 0.22502124309539795, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.43108054995536804, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.3576390743255615, + "rewards/VideoAccuracy/std": 0.4690335988998413, + "step": 67, + "train_speed(iter/s)": 0.018329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 623.0, + "completions/mean_length": 384.21429443359375, + "completions/min_length": 227.0, + "entropy/max": 0.77734375, + "entropy/mean": 0.322265625, + "entropy/min": 0.1748046875, + "epoch": 0.068, + "grad_norm": 1.3718544211406185, + "kl": 0.042236328125, + "learning_rate": 1.983110024982373e-06, + "loss": 0.00042220597970299423, + "memory(GiB)": 145.75, + "reward": 1.8695082664489746, + "reward_std": 0.3609388768672943, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.25149857997894287, + "rewards/EvidenceHallucination/std": 0.26422175765037537, + "rewards/Evidence_Num_Record/mean": 2.5, + "rewards/Evidence_Num_Record/std": 0.5521576404571533, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6858752369880676, + "rewards/VideoAccuracy/std": 0.42166298627853394, + "step": 68, + "train_speed(iter/s)": 0.018371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 493.0, + "completions/mean_length": 318.5, + "completions/min_length": 224.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.458984375, + "entropy/min": 0.2412109375, + "epoch": 0.069, + "grad_norm": 1.693376453582021, + "kl": 0.06494140625, + "learning_rate": 1.982524308126977e-06, + "loss": 0.0006529532838612795, + "memory(GiB)": 145.75, + "reward": 1.678009033203125, + "reward_std": 0.360478013753891, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29480722546577454, + "rewards/EvidenceHallucination/std": 0.28128427267074585, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.37020254135131836, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6190476417541504, + "rewards/VideoAccuracy/std": 0.4915074408054352, + "step": 69, + "train_speed(iter/s)": 0.018359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 365.0, + "completions/mean_length": 288.69049072265625, + "completions/min_length": 156.0, + "entropy/max": 0.703125, + "entropy/mean": 0.341796875, + "entropy/min": 0.19921875, + "epoch": 0.07, + "grad_norm": 1.5434923760121793, + "kl": 0.061279296875, + "learning_rate": 1.9819286972627067e-06, + "loss": 0.0006184515659697354, + "memory(GiB)": 145.75, + "reward": 1.2753374576568604, + "reward_std": 0.37550923228263855, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.08139073103666306, + "rewards/EvidenceHallucination/std": 0.19860804080963135, + "rewards/Evidence_Num_Record/mean": 1.9523810148239136, + "rewards/Evidence_Num_Record/std": 0.30860671401023865, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2590593695640564, + "rewards/VideoAccuracy/std": 0.41239312291145325, + "step": 70, + "train_speed(iter/s)": 0.018408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 758.0, + "completions/mean_length": 444.0238037109375, + "completions/min_length": 257.0, + "entropy/max": 0.431640625, + "entropy/mean": 0.279296875, + "entropy/min": 0.142578125, + "epoch": 0.071, + "grad_norm": 0.9259762128106337, + "kl": 0.044189453125, + "learning_rate": 1.981323198387356e-06, + "loss": 0.0008414517506025732, + "memory(GiB)": 145.75, + "reward": 1.4682765007019043, + "reward_std": 0.16010618209838867, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.04678576812148094, + "rewards/EvidenceHallucination/std": 0.15406948328018188, + "rewards/Evidence_Num_Record/mean": 3.0, + "rewards/Evidence_Num_Record/std": 1.0121216773986816, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.25891920924186707, + "rewards/VideoAccuracy/std": 0.34589600563049316, + "step": 71, + "train_speed(iter/s)": 0.018451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 403.0, + "completions/mean_length": 313.19049072265625, + "completions/min_length": 224.0, + "entropy/max": 0.8671875, + "entropy/mean": 0.44140625, + "entropy/min": 0.2490234375, + "epoch": 0.072, + "grad_norm": 1.370825290226045, + "kl": 0.06298828125, + "learning_rate": 1.9807078175982922e-06, + "loss": 0.0006318792584352195, + "memory(GiB)": 145.75, + "reward": 1.468855857849121, + "reward_std": 0.4081823527812958, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.14189793169498444, + "rewards/EvidenceHallucination/std": 0.1861451119184494, + "rewards/Evidence_Num_Record/mean": 2.0, + "rewards/Evidence_Num_Record/std": 0.44172608852386475, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4523809552192688, + "rewards/VideoAccuracy/std": 0.5037605166435242, + "step": 72, + "train_speed(iter/s)": 0.018458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 419.0, + "completions/mean_length": 308.21429443359375, + "completions/min_length": 242.0, + "entropy/max": 0.65625, + "entropy/mean": 0.376953125, + "entropy/min": 0.197265625, + "epoch": 0.073, + "grad_norm": 1.5900604917883046, + "kl": 0.06787109375, + "learning_rate": 1.980082561092393e-06, + "loss": 0.0006817152607254684, + "memory(GiB)": 145.75, + "reward": 1.5464859008789062, + "reward_std": 0.3614649176597595, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22563165426254272, + "rewards/EvidenceHallucination/std": 0.25571611523628235, + "rewards/Evidence_Num_Record/mean": 2.238095283508301, + "rewards/Evidence_Num_Record/std": 0.43108054995536804, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5013597011566162, + "rewards/VideoAccuracy/std": 0.501571536064148, + "step": 73, + "train_speed(iter/s)": 0.018492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 467.0, + "completions/mean_length": 319.3571472167969, + "completions/min_length": 214.0, + "entropy/max": 0.671875, + "entropy/mean": 0.3515625, + "entropy/min": 0.2373046875, + "epoch": 0.074, + "grad_norm": 1.3190473186336864, + "kl": 0.06787109375, + "learning_rate": 1.9794474351659853e-06, + "loss": 0.0010814255801960826, + "memory(GiB)": 145.75, + "reward": 1.4756110906600952, + "reward_std": 0.21122239530086517, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430335700511932, + "rewards/EvidenceHallucination/mean": 0.16264285147190094, + "rewards/EvidenceHallucination/std": 0.27389493584632874, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.5763435363769531, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.3216537833213806, + "rewards/VideoAccuracy/std": 0.42325273156166077, + "step": 74, + "train_speed(iter/s)": 0.018551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 788.0, + "completions/mean_length": 394.26190185546875, + "completions/min_length": 182.0, + "entropy/max": 0.9609375, + "entropy/mean": 0.40625, + "entropy/min": 0.1435546875, + "epoch": 0.075, + "grad_norm": 0.8582498090559345, + "kl": 0.057373046875, + "learning_rate": 1.978802446214779e-06, + "loss": 0.0005849922308698297, + "memory(GiB)": 145.75, + "reward": 1.2947375774383545, + "reward_std": 0.20632633566856384, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.08700108528137207, + "rewards/EvidenceHallucination/std": 0.2174977958202362, + "rewards/Evidence_Num_Record/mean": 2.8809523582458496, + "rewards/Evidence_Num_Record/std": 1.253334641456604, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.2273373007774353, + "rewards/VideoAccuracy/std": 0.3822914659976959, + "step": 75, + "train_speed(iter/s)": 0.018504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 329.0, + "completions/mean_length": 274.6428527832031, + "completions/min_length": 197.0, + "entropy/max": 0.5, + "entropy/mean": 0.359375, + "entropy/min": 0.26953125, + "epoch": 0.076, + "grad_norm": 1.5982752636011046, + "kl": 0.08740234375, + "learning_rate": 1.9781476007338054e-06, + "loss": 0.0008751722052693367, + "memory(GiB)": 145.75, + "reward": 1.9944734573364258, + "reward_std": 0.2277480810880661, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45637980103492737, + "rewards/EvidenceHallucination/std": 0.28484517335891724, + "rewards/Evidence_Num_Record/mean": 2.047619104385376, + "rewards/Evidence_Num_Record/std": 0.30860671401023865, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9031975269317627, + "rewards/VideoAccuracy/std": 0.2695874869823456, + "step": 76, + "train_speed(iter/s)": 0.018539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 390.0, + "completions/mean_length": 283.5476379394531, + "completions/min_length": 214.0, + "entropy/max": 0.578125, + "entropy/mean": 0.37109375, + "entropy/min": 0.251953125, + "epoch": 0.077, + "grad_norm": 1.6176824224306654, + "kl": 0.07666015625, + "learning_rate": 1.9774829053173526e-06, + "loss": 0.0007775009144097567, + "memory(GiB)": 145.75, + "reward": 1.4460300207138062, + "reward_std": 0.2849999666213989, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.15240485966205597, + "rewards/EvidenceHallucination/std": 0.23497402667999268, + "rewards/Evidence_Num_Record/mean": 1.9523810148239136, + "rewards/Evidence_Num_Record/std": 0.4915074110031128, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.3607870936393738, + "rewards/VideoAccuracy/std": 0.4723948538303375, + "step": 77, + "train_speed(iter/s)": 0.018647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 587.0, + "completions/mean_length": 399.4285888671875, + "completions/min_length": 225.0, + "entropy/max": 1.2109375, + "entropy/mean": 0.40234375, + "entropy/min": 0.1826171875, + "epoch": 0.078, + "grad_norm": 1.442821113130145, + "kl": 0.052490234375, + "learning_rate": 1.976808366658895e-06, + "loss": 0.0005323234363459051, + "memory(GiB)": 145.75, + "reward": 1.9828182458877563, + "reward_std": 0.2595849633216858, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3858642578125, + "rewards/EvidenceHallucination/std": 0.3123042583465576, + "rewards/Evidence_Num_Record/mean": 2.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.7501451373100281, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7723119854927063, + "rewards/VideoAccuracy/std": 0.389898419380188, + "step": 78, + "train_speed(iter/s)": 0.018659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 347.0, + "completions/mean_length": 268.0952453613281, + "completions/min_length": 203.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.3828125, + "entropy/min": 0.1904296875, + "epoch": 0.079, + "grad_norm": 1.864548819936298, + "kl": 0.09423828125, + "learning_rate": 1.97612399155103e-06, + "loss": 0.0009492395329289138, + "memory(GiB)": 145.75, + "reward": 1.645930528640747, + "reward_std": 0.5275153517723083, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.31298649311065674, + "rewards/EvidenceHallucination/std": 0.32048505544662476, + "rewards/Evidence_Num_Record/mean": 1.9523810148239136, + "rewards/Evidence_Num_Record/std": 0.37949779629707336, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679574370384216, + "step": 79, + "train_speed(iter/s)": 0.018674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 435.0, + "completions/mean_length": 273.0476379394531, + "completions/min_length": 162.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.34375, + "entropy/min": 0.173828125, + "epoch": 0.08, + "grad_norm": 1.5542895075602492, + "kl": 0.08349609375, + "learning_rate": 1.975429786885407e-06, + "loss": 0.0008524173754267395, + "memory(GiB)": 145.75, + "reward": 1.2154887914657593, + "reward_std": 0.21944718062877655, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.1446523219347, + "rewards/EvidenceHallucination/std": 0.29942378401756287, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.8207529187202454, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.19846321642398834, + "rewards/VideoAccuracy/std": 0.37335455417633057, + "step": 80, + "train_speed(iter/s)": 0.018726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 847.0, + "completions/mean_length": 450.452392578125, + "completions/min_length": 196.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.328125, + "entropy/min": 0.1787109375, + "epoch": 0.081, + "grad_norm": 1.3445666849751166, + "kl": 0.0595703125, + "learning_rate": 1.974725759652659e-06, + "loss": 0.0005987854674458504, + "memory(GiB)": 145.75, + "reward": 1.987047553062439, + "reward_std": 0.19900517165660858, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31228476762771606, + "rewards/EvidenceHallucination/std": 0.2700451910495758, + "rewards/Evidence_Num_Record/mean": 3.095238208770752, + "rewards/Evidence_Num_Record/std": 0.9578819274902344, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7245904803276062, + "rewards/VideoAccuracy/std": 0.3621458113193512, + "step": 81, + "train_speed(iter/s)": 0.018757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 404.0, + "completions/mean_length": 297.6190490722656, + "completions/min_length": 184.0, + "entropy/max": 1.5234375, + "entropy/mean": 0.51953125, + "entropy/min": 0.314453125, + "epoch": 0.082, + "grad_norm": 1.565173013639308, + "kl": 0.10791015625, + "learning_rate": 1.9740119169423336e-06, + "loss": 0.0010965773835778236, + "memory(GiB)": 145.75, + "reward": 1.8209408521652222, + "reward_std": 0.24913637340068817, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41422826051712036, + "rewards/EvidenceHallucination/std": 0.31709954142570496, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.37020260095596313, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.738095223903656, + "rewards/VideoAccuracy/std": 0.44500061869621277, + "step": 82, + "train_speed(iter/s)": 0.018742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 395.0, + "completions/mean_length": 273.5714416503906, + "completions/min_length": 144.0, + "entropy/max": 0.5, + "entropy/mean": 0.365234375, + "entropy/min": 0.23828125, + "epoch": 0.083, + "grad_norm": 1.9360121628140547, + "kl": 0.1171875, + "learning_rate": 1.9732882659428175e-06, + "loss": 0.0011830523144453764, + "memory(GiB)": 145.75, + "reward": 1.260207176208496, + "reward_std": 0.36909279227256775, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.18188481032848358, + "rewards/EvidenceHallucination/std": 0.33221206068992615, + "rewards/Evidence_Num_Record/mean": 2.047619104385376, + "rewards/Evidence_Num_Record/std": 0.4915074408054352, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2357349395751953, + "rewards/VideoAccuracy/std": 0.3935372829437256, + "step": 83, + "train_speed(iter/s)": 0.018691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 512.0, + "completions/mean_length": 304.952392578125, + "completions/min_length": 177.0, + "entropy/max": 0.435546875, + "entropy/mean": 0.32421875, + "entropy/min": 0.19921875, + "epoch": 0.084, + "grad_norm": 1.359891083571745, + "kl": 0.09423828125, + "learning_rate": 1.972554813941269e-06, + "loss": 0.0011465921998023987, + "memory(GiB)": 145.75, + "reward": 1.6613414287567139, + "reward_std": 0.07137398421764374, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.3027176856994629, + "rewards/EvidenceHallucination/std": 0.3304864466190338, + "rewards/Evidence_Num_Record/mean": 2.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.503760576248169, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.47936904430389404, + "rewards/VideoAccuracy/std": 0.4919584095478058, + "step": 84, + "train_speed(iter/s)": 0.018785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 644.0, + "completions/mean_length": 382.6190490722656, + "completions/min_length": 220.0, + "entropy/max": 1.1015625, + "entropy/mean": 0.421875, + "entropy/min": 0.19140625, + "epoch": 0.085, + "grad_norm": 1.5264792017573605, + "kl": 0.09130859375, + "learning_rate": 1.9718115683235415e-06, + "loss": 0.0009243786334991455, + "memory(GiB)": 145.75, + "reward": 1.7950348854064941, + "reward_std": 0.40462440252304077, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.327504962682724, + "rewards/EvidenceHallucination/std": 0.3095806837081909, + "rewards/Evidence_Num_Record/mean": 2.6666667461395264, + "rewards/Evidence_Num_Record/std": 1.0040568113327026, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6628671288490295, + "rewards/VideoAccuracy/std": 0.44367995858192444, + "step": 85, + "train_speed(iter/s)": 0.018814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 404.0, + "completions/mean_length": 280.73809814453125, + "completions/min_length": 217.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.38671875, + "entropy/min": 0.2216796875, + "epoch": 0.086, + "grad_norm": 1.3721674291805483, + "kl": 0.1298828125, + "learning_rate": 1.97105853657411e-06, + "loss": 0.0013156002387404442, + "memory(GiB)": 145.75, + "reward": 1.3735020160675049, + "reward_std": 0.39117431640625, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20084336400032043, + "rewards/EvidenceHallucination/std": 0.2974098324775696, + "rewards/Evidence_Num_Record/mean": 2.047619104385376, + "rewards/Evidence_Num_Record/std": 0.37949779629707336, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3333333432674408, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 86, + "train_speed(iter/s)": 0.018832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 354.0, + "completions/mean_length": 278.1190490722656, + "completions/min_length": 182.0, + "entropy/max": 0.75390625, + "entropy/mean": 0.341796875, + "entropy/min": 0.1787109375, + "epoch": 0.087, + "grad_norm": 1.4565366657438574, + "kl": 0.1318359375, + "learning_rate": 1.9702957262759963e-06, + "loss": 0.0013202999252825975, + "memory(GiB)": 145.75, + "reward": 1.636621356010437, + "reward_std": 0.2807752788066864, + "rewards/EvidenceFormat/mean": 0.9523809552192688, + "rewards/EvidenceFormat/std": 0.21554027497768402, + "rewards/EvidenceHallucination/mean": 0.3674306571483612, + "rewards/EvidenceHallucination/std": 0.35236856341362, + "rewards/Evidence_Num_Record/mean": 1.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.45276281237602234, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5202780961990356, + "rewards/VideoAccuracy/std": 0.48144081234931946, + "step": 87, + "train_speed(iter/s)": 0.018877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 780.0, + "completions/mean_length": 430.0952453613281, + "completions/min_length": 241.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.365234375, + "entropy/min": 0.177734375, + "epoch": 0.088, + "grad_norm": 1.4062133131420138, + "kl": 0.080078125, + "learning_rate": 1.969523145110691e-06, + "loss": 0.0008045134018175304, + "memory(GiB)": 145.75, + "reward": 1.762561321258545, + "reward_std": 0.39015036821365356, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2917018532752991, + "rewards/EvidenceHallucination/std": 0.3407047688961029, + "rewards/Evidence_Num_Record/mean": 2.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.8890219330787659, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5708876848220825, + "rewards/VideoAccuracy/std": 0.46675702929496765, + "step": 88, + "train_speed(iter/s)": 0.018891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 418.0, + "completions/mean_length": 283.6190490722656, + "completions/min_length": 219.0, + "entropy/max": 0.703125, + "entropy/mean": 0.4296875, + "entropy/min": 0.26953125, + "epoch": 0.089, + "grad_norm": 1.6626483004876464, + "kl": 0.1396484375, + "learning_rate": 1.9687408008580783e-06, + "loss": 0.0014032538747414947, + "memory(GiB)": 145.75, + "reward": 1.3591610193252563, + "reward_std": 0.44488558173179626, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18866242468357086, + "rewards/EvidenceHallucination/std": 0.30501607060432434, + "rewards/Evidence_Num_Record/mean": 2.0714285373687744, + "rewards/Evidence_Num_Record/std": 0.26066118478775024, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3333333432674408, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 89, + "train_speed(iter/s)": 0.018762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 720.0, + "completions/mean_length": 286.1190490722656, + "completions/min_length": 183.0, + "entropy/max": 1.4296875, + "entropy/mean": 0.4921875, + "entropy/min": 0.255859375, + "epoch": 0.09, + "grad_norm": 1.3142514728000814, + "kl": 0.130859375, + "learning_rate": 1.967948701396356e-06, + "loss": 0.0013334897812455893, + "memory(GiB)": 145.75, + "reward": 1.1134485006332397, + "reward_std": 0.21092890202999115, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.05557645112276077, + "rewards/EvidenceHallucination/std": 0.17599347233772278, + "rewards/Evidence_Num_Record/mean": 1.8333333730697632, + "rewards/Evidence_Num_Record/std": 0.5372316837310791, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.11423792690038681, + "rewards/VideoAccuracy/std": 0.3155045509338379, + "step": 90, + "train_speed(iter/s)": 0.018816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 679.0, + "completions/mean_length": 405.3571472167969, + "completions/min_length": 206.0, + "entropy/max": 0.482421875, + "entropy/mean": 0.298828125, + "entropy/min": 0.1396484375, + "epoch": 0.091, + "grad_norm": 1.3160893508945921, + "kl": 0.07958984375, + "learning_rate": 1.967146854701957e-06, + "loss": 0.0008046379080042243, + "memory(GiB)": 145.75, + "reward": 2.143765449523926, + "reward_std": 0.13687437772750854, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4880090057849884, + "rewards/EvidenceHallucination/std": 0.30523109436035156, + "rewards/Evidence_Num_Record/mean": 2.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.8811485767364502, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8461634516716003, + "rewards/VideoAccuracy/std": 0.3223564326763153, + "step": 91, + "train_speed(iter/s)": 0.018827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 406.0, + "completions/mean_length": 289.0476379394531, + "completions/min_length": 206.0, + "entropy/max": 0.7890625, + "entropy/mean": 0.427734375, + "entropy/min": 0.21875, + "epoch": 0.092, + "grad_norm": 1.5615526688327124, + "kl": 0.12890625, + "learning_rate": 1.9663352688494683e-06, + "loss": 0.001303645665757358, + "memory(GiB)": 145.75, + "reward": 1.6100871562957764, + "reward_std": 0.3530694842338562, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3123398423194885, + "rewards/EvidenceHallucination/std": 0.3104865550994873, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.37020257115364075, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5476190447807312, + "rewards/VideoAccuracy/std": 0.5037605166435242, + "step": 92, + "train_speed(iter/s)": 0.018856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 612.0, + "completions/mean_length": 329.0476379394531, + "completions/min_length": 223.0, + "entropy/max": 1.28125, + "entropy/mean": 0.453125, + "entropy/min": 0.1181640625, + "epoch": 0.093, + "grad_norm": 1.4876802809944685, + "kl": 0.1171875, + "learning_rate": 1.965513952011551e-06, + "loss": 0.0012173369759693742, + "memory(GiB)": 145.75, + "reward": 1.6506892442703247, + "reward_std": 0.40816056728363037, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32043275237083435, + "rewards/EvidenceHallucination/std": 0.28490114212036133, + "rewards/Evidence_Num_Record/mean": 2.190476179122925, + "rewards/Evidence_Num_Record/std": 1.0873574018478394, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5866028070449829, + "rewards/VideoAccuracy/std": 0.4674220085144043, + "step": 93, + "train_speed(iter/s)": 0.018846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 546.0, + "completions/mean_length": 328.9761962890625, + "completions/min_length": 244.0, + "entropy/max": 0.65625, + "entropy/mean": 0.361328125, + "entropy/min": 0.18359375, + "epoch": 0.094, + "grad_norm": 1.4128504167741638, + "kl": 0.1259765625, + "learning_rate": 1.964682912458856e-06, + "loss": 0.0012843573931604624, + "memory(GiB)": 145.75, + "reward": 1.8426321744918823, + "reward_std": 0.0740402340888977, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24410170316696167, + "rewards/EvidenceHallucination/std": 0.3090246617794037, + "rewards/Evidence_Num_Record/mean": 2.404762029647827, + "rewards/Evidence_Num_Record/std": 0.7669872641563416, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.660478413105011, + "rewards/VideoAccuracy/std": 0.47979769110679626, + "step": 94, + "train_speed(iter/s)": 0.018934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 901.0, + "completions/mean_length": 404.66668701171875, + "completions/min_length": 236.0, + "entropy/max": 0.921875, + "entropy/mean": 0.431640625, + "entropy/min": 0.181640625, + "epoch": 0.095, + "grad_norm": 1.3466708994180563, + "kl": 0.09619140625, + "learning_rate": 1.963842158559942e-06, + "loss": 0.0009734997292980552, + "memory(GiB)": 145.75, + "reward": 1.4714419841766357, + "reward_std": 0.33735716342926025, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16494199633598328, + "rewards/EvidenceHallucination/std": 0.2535810172557831, + "rewards/Evidence_Num_Record/mean": 2.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.8981204032897949, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.3717869222164154, + "rewards/VideoAccuracy/std": 0.4193684756755829, + "step": 95, + "train_speed(iter/s)": 0.018945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 377.0, + "completions/mean_length": 280.9761962890625, + "completions/min_length": 216.0, + "entropy/max": 1.234375, + "entropy/mean": 0.421875, + "entropy/min": 0.2412109375, + "epoch": 0.096, + "grad_norm": 1.5754236514272237, + "kl": 0.1376953125, + "learning_rate": 1.9629916987811925e-06, + "loss": 0.0013955392641946673, + "memory(GiB)": 145.75, + "reward": 1.781163215637207, + "reward_std": 0.31508854031562805, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46638545393943787, + "rewards/EvidenceHallucination/std": 0.3576310873031616, + "rewards/Evidence_Num_Record/mean": 2.095238208770752, + "rewards/Evidence_Num_Record/std": 0.2971017360687256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6878859400749207, + "rewards/VideoAccuracy/std": 0.44374170899391174, + "step": 96, + "train_speed(iter/s)": 0.01895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 357.0, + "completions/mean_length": 293.8571472167969, + "completions/min_length": 232.0, + "entropy/max": 0.4609375, + "entropy/mean": 0.353515625, + "entropy/min": 0.2265625, + "epoch": 0.097, + "grad_norm": 1.4320296713190575, + "kl": 0.1318359375, + "learning_rate": 1.962131541686727e-06, + "loss": 0.0013267816975712776, + "memory(GiB)": 145.75, + "reward": 1.6482120752334595, + "reward_std": 0.2762901782989502, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30244576930999756, + "rewards/EvidenceHallucination/std": 0.36234188079833984, + "rewards/Evidence_Num_Record/mean": 2.0, + "rewards/Evidence_Num_Record/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5210562348365784, + "rewards/VideoAccuracy/std": 0.5029321312904358, + "step": 97, + "train_speed(iter/s)": 0.018976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 488.0, + "completions/mean_length": 371.5, + "completions/min_length": 233.0, + "entropy/max": 0.8515625, + "entropy/mean": 0.380859375, + "entropy/min": 0.197265625, + "epoch": 0.098, + "grad_norm": 1.1665879934566166, + "kl": 0.08154296875, + "learning_rate": 1.9612616959383188e-06, + "loss": 0.0008269266108982265, + "memory(GiB)": 145.75, + "reward": 1.9281727075576782, + "reward_std": 0.2275279015302658, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4922899603843689, + "rewards/EvidenceHallucination/std": 0.3693905770778656, + "rewards/Evidence_Num_Record/mean": 2.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.551631510257721, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6963813304901123, + "rewards/VideoAccuracy/std": 0.40303418040275574, + "step": 98, + "train_speed(iter/s)": 0.018996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 413.0, + "completions/mean_length": 289.1190490722656, + "completions/min_length": 225.0, + "entropy/max": 0.953125, + "entropy/mean": 0.486328125, + "entropy/min": 0.21484375, + "epoch": 0.099, + "grad_norm": 1.6602149895534124, + "kl": 0.1357421875, + "learning_rate": 1.9603821702953047e-06, + "loss": 0.0013623833656311035, + "memory(GiB)": 145.75, + "reward": 1.7348952293395996, + "reward_std": 0.3678227365016937, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3411424458026886, + "rewards/EvidenceHallucination/std": 0.30779772996902466, + "rewards/Evidence_Num_Record/mean": 2.190476179122925, + "rewards/Evidence_Num_Record/std": 0.5054867267608643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 99, + "train_speed(iter/s)": 0.018977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 503.0, + "completions/mean_length": 317.0952453613281, + "completions/min_length": 182.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.373046875, + "entropy/min": 0.1357421875, + "epoch": 0.1, + "grad_norm": 1.532235925960191, + "kl": 0.134765625, + "learning_rate": 1.9594929736144973e-06, + "loss": 0.00138301239348948, + "memory(GiB)": 145.75, + "reward": 1.3657147884368896, + "reward_std": 0.398904412984848, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1937606930732727, + "rewards/EvidenceHallucination/std": 0.31757616996765137, + "rewards/Evidence_Num_Record/mean": 2.0238096714019775, + "rewards/Evidence_Num_Record/std": 0.26942533254623413, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3269627094268799, + "rewards/VideoAccuracy/std": 0.4119543433189392, + "step": 100, + "train_speed(iter/s)": 0.018944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/mean_length": 395.66668701171875, + "completions/min_length": 231.0, + "entropy/max": 0.734375, + "entropy/mean": 0.31640625, + "entropy/min": 0.1669921875, + "epoch": 0.101, + "grad_norm": 1.2899605030948045, + "kl": 0.083984375, + "learning_rate": 1.9585941148500986e-06, + "loss": 0.0008435548515990376, + "memory(GiB)": 145.75, + "reward": 1.9611115455627441, + "reward_std": 0.31510135531425476, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.401968777179718, + "rewards/EvidenceHallucination/std": 0.311238169670105, + "rewards/Evidence_Num_Record/mean": 2.690476179122925, + "rewards/Evidence_Num_Record/std": 0.7152722477912903, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6807177662849426, + "rewards/VideoAccuracy/std": 0.40623319149017334, + "step": 101, + "train_speed(iter/s)": 0.018779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 378.0, + "completions/mean_length": 284.8571472167969, + "completions/min_length": 183.0, + "entropy/max": 0.96875, + "entropy/mean": 0.447265625, + "entropy/min": 0.2392578125, + "epoch": 0.102, + "grad_norm": 1.6057589227526903, + "kl": 0.1298828125, + "learning_rate": 1.957685603053605e-06, + "loss": 0.0013163024559617043, + "memory(GiB)": 145.75, + "reward": 1.7291897535324097, + "reward_std": 0.3314557671546936, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.312615305185318, + "rewards/EvidenceHallucination/std": 0.27923011779785156, + "rewards/Evidence_Num_Record/mean": 2.0714285373687744, + "rewards/Evidence_Num_Record/std": 0.34165000915527344, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 102, + "train_speed(iter/s)": 0.018807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 354.0, + "completions/mean_length": 282.8571472167969, + "completions/min_length": 195.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.373046875, + "entropy/min": 0.25390625, + "epoch": 0.103, + "grad_norm": 1.5676941891920446, + "kl": 0.1455078125, + "learning_rate": 1.9567674473737218e-06, + "loss": 0.0014565930468961596, + "memory(GiB)": 145.75, + "reward": 1.2863401174545288, + "reward_std": 0.42372652888298035, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.12458339333534241, + "rewards/EvidenceHallucination/std": 0.2610587179660797, + "rewards/Evidence_Num_Record/mean": 2.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.37719547748565674, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.26142337918281555, + "rewards/VideoAccuracy/std": 0.4441832900047302, + "step": 103, + "train_speed(iter/s)": 0.018817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 856.0, + "completions/mean_length": 360.452392578125, + "completions/min_length": 208.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.37109375, + "entropy/min": 0.203125, + "epoch": 0.104, + "grad_norm": 1.270247282735159, + "kl": 0.130859375, + "learning_rate": 1.955839657056265e-06, + "loss": 0.0015169496182352304, + "memory(GiB)": 145.75, + "reward": 1.7464238405227661, + "reward_std": 0.2219785451889038, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2563296854496002, + "rewards/EvidenceHallucination/std": 0.32491403818130493, + "rewards/Evidence_Num_Record/mean": 2.642857074737549, + "rewards/Evidence_Num_Record/std": 1.122310996055603, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5618245601654053, + "rewards/VideoAccuracy/std": 0.45603543519973755, + "step": 104, + "train_speed(iter/s)": 0.018801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 670.0, + "completions/mean_length": 392.5714416503906, + "completions/min_length": 229.0, + "entropy/max": 1.203125, + "entropy/mean": 0.408203125, + "entropy/min": 0.1689453125, + "epoch": 0.105, + "grad_norm": 1.5373718970893957, + "kl": 0.109375, + "learning_rate": 1.9549022414440736e-06, + "loss": 0.001107646618038416, + "memory(GiB)": 145.75, + "reward": 1.7364230155944824, + "reward_std": 0.3530835807323456, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3004482090473175, + "rewards/EvidenceHallucination/std": 0.33540382981300354, + "rewards/Evidence_Num_Record/mean": 2.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.7501451373100281, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6096665859222412, + "rewards/VideoAccuracy/std": 0.4684169292449951, + "step": 105, + "train_speed(iter/s)": 0.018819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 406.0, + "completions/mean_length": 297.8333435058594, + "completions/min_length": 167.0, + "entropy/max": 0.482421875, + "entropy/mean": 0.375, + "entropy/min": 0.2197265625, + "epoch": 0.106, + "grad_norm": 1.3914419307345625, + "kl": 0.1416015625, + "learning_rate": 1.9539552099769126e-06, + "loss": 0.0014185493346303701, + "memory(GiB)": 145.75, + "reward": 1.5386505126953125, + "reward_std": 0.34179314970970154, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31230056285858154, + "rewards/EvidenceHallucination/std": 0.3736018240451813, + "rewards/Evidence_Num_Record/mean": 2.2142858505249023, + "rewards/Evidence_Num_Record/std": 0.4152997136116028, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4761904776096344, + "rewards/VideoAccuracy/std": 0.5054867267608643, + "step": 106, + "train_speed(iter/s)": 0.018817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 469.0, + "completions/mean_length": 300.9285888671875, + "completions/min_length": 155.0, + "entropy/max": 0.609375, + "entropy/mean": 0.39453125, + "entropy/min": 0.15625, + "epoch": 0.107, + "grad_norm": 1.4888784408936901, + "kl": 0.1513671875, + "learning_rate": 1.952998572191378e-06, + "loss": 0.0015213524457067251, + "memory(GiB)": 145.75, + "reward": 1.6890357732772827, + "reward_std": 0.32763195037841797, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3215029239654541, + "rewards/EvidenceHallucination/std": 0.32102346420288086, + "rewards/Evidence_Num_Record/mean": 2.142857074737549, + "rewards/Evidence_Num_Record/std": 0.41739192605018616, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5580684542655945, + "rewards/VideoAccuracy/std": 0.4772607684135437, + "step": 107, + "train_speed(iter/s)": 0.018838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 745.0, + "completions/mean_length": 475.21429443359375, + "completions/min_length": 269.0, + "entropy/max": 1.15625, + "entropy/mean": 0.38671875, + "entropy/min": 0.162109375, + "epoch": 0.108, + "grad_norm": 1.267622073447083, + "kl": 0.07763671875, + "learning_rate": 1.9520323377208013e-06, + "loss": 0.000781947048380971, + "memory(GiB)": 145.75, + "reward": 1.8262310028076172, + "reward_std": 0.3916190266609192, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30679747462272644, + "rewards/EvidenceHallucination/std": 0.284414678812027, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 0.8621610999107361, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6315380334854126, + "rewards/VideoAccuracy/std": 0.4059906601905823, + "step": 108, + "train_speed(iter/s)": 0.018845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 434.0, + "completions/mean_length": 310.73809814453125, + "completions/min_length": 185.0, + "entropy/max": 1.1953125, + "entropy/mean": 0.51171875, + "entropy/min": 0.25390625, + "epoch": 0.109, + "grad_norm": 1.6471346855993556, + "kl": 0.1357421875, + "learning_rate": 1.9510565162951534e-06, + "loss": 0.0013572005555033684, + "memory(GiB)": 146.12, + "reward": 1.835160255432129, + "reward_std": 0.19685718417167664, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3662773370742798, + "rewards/EvidenceHallucination/std": 0.27799177169799805, + "rewards/Evidence_Num_Record/mean": 2.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.5077791810035706, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.761904776096344, + "rewards/VideoAccuracy/std": 0.43108054995536804, + "step": 109, + "train_speed(iter/s)": 0.018797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 427.0, + "completions/mean_length": 288.5476379394531, + "completions/min_length": 199.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.373046875, + "entropy/min": 0.2373046875, + "epoch": 0.11, + "grad_norm": 1.2249208193246408, + "kl": 0.142578125, + "learning_rate": 1.9500711177409454e-06, + "loss": 0.0014356218744069338, + "memory(GiB)": 146.12, + "reward": 1.105482578277588, + "reward_std": 0.1584109514951706, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.020045572891831398, + "rewards/EvidenceHallucination/std": 0.09083432704210281, + "rewards/Evidence_Num_Record/mean": 2.0238096714019775, + "rewards/Evidence_Num_Record/std": 0.4679011404514313, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.10147348046302795, + "rewards/VideoAccuracy/std": 0.26096680760383606, + "step": 110, + "train_speed(iter/s)": 0.01883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 684.0, + "completions/mean_length": 402.23809814453125, + "completions/min_length": 234.0, + "entropy/max": 0.458984375, + "entropy/mean": 0.28125, + "entropy/min": 0.146484375, + "epoch": 0.111, + "grad_norm": 1.1488735921623328, + "kl": 0.09912109375, + "learning_rate": 1.9490761519811294e-06, + "loss": 0.001193409669212997, + "memory(GiB)": 146.12, + "reward": 1.8218879699707031, + "reward_std": 0.2910378575325012, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24762538075447083, + "rewards/EvidenceHallucination/std": 0.36527809500694275, + "rewards/Evidence_Num_Record/mean": 2.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.5008703470230103, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5723628997802734, + "rewards/VideoAccuracy/std": 0.46409302949905396, + "step": 111, + "train_speed(iter/s)": 0.018845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 438.0, + "completions/mean_length": 303.0476379394531, + "completions/min_length": 195.0, + "entropy/max": 1.2890625, + "entropy/mean": 0.54296875, + "entropy/min": 0.259765625, + "epoch": 0.112, + "grad_norm": 1.5008335928014824, + "kl": 0.1494140625, + "learning_rate": 1.9480716290349993e-06, + "loss": 0.0015141356270760298, + "memory(GiB)": 146.12, + "reward": 1.5550602674484253, + "reward_std": 0.2568727731704712, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2753015160560608, + "rewards/EvidenceHallucination/std": 0.3077669143676758, + "rewards/Evidence_Num_Record/mean": 2.238095283508301, + "rewards/Evidence_Num_Record/std": 0.5763435959815979, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5, + "rewards/VideoAccuracy/std": 0.5060608386993408, + "step": 112, + "train_speed(iter/s)": 0.018847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 494.0, + "completions/mean_length": 314.3095397949219, + "completions/min_length": 164.0, + "entropy/max": 1.4921875, + "entropy/mean": 0.458984375, + "entropy/min": 0.2412109375, + "epoch": 0.113, + "grad_norm": 1.5778138722270425, + "kl": 0.140625, + "learning_rate": 1.9470575590180908e-06, + "loss": 0.0014103710418567061, + "memory(GiB)": 146.12, + "reward": 1.460694432258606, + "reward_std": 0.29496198892593384, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21751172840595245, + "rewards/EvidenceHallucination/std": 0.32539689540863037, + "rewards/Evidence_Num_Record/mean": 2.261904716491699, + "rewards/Evidence_Num_Record/std": 0.5868279337882996, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4171919524669647, + "rewards/VideoAccuracy/std": 0.48727452754974365, + "step": 113, + "train_speed(iter/s)": 0.018868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 963.0, + "completions/mean_length": 375.3333435058594, + "completions/min_length": 212.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.353515625, + "entropy/min": 0.1650390625, + "epoch": 0.114, + "grad_norm": 1.3875680698005661, + "kl": 0.1337890625, + "learning_rate": 1.946033952142077e-06, + "loss": 0.0015402303542941809, + "memory(GiB)": 146.12, + "reward": 1.5177232027053833, + "reward_std": 0.12473516166210175, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18247689306735992, + "rewards/EvidenceHallucination/std": 0.3012517988681793, + "rewards/Evidence_Num_Record/mean": 2.8333334922790527, + "rewards/Evidence_Num_Record/std": 1.7379082441329956, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.34789445996284485, + "rewards/VideoAccuracy/std": 0.4493321180343628, + "step": 114, + "train_speed(iter/s)": 0.01893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2034.0, + "completions/mean_length": 454.23809814453125, + "completions/min_length": 208.0, + "entropy/max": 0.8984375, + "entropy/mean": 0.4140625, + "entropy/min": 0.1494140625, + "epoch": 0.115, + "grad_norm": 1.0701861902019776, + "kl": 0.10400390625, + "learning_rate": 1.945000818714668e-06, + "loss": 0.001130782999098301, + "memory(GiB)": 146.12, + "reward": 1.4441184997558594, + "reward_std": 0.2652636468410492, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.148814857006073, + "rewards/EvidenceHallucination/std": 0.2521863281726837, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 5.08305025100708, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.3476888835430145, + "rewards/VideoAccuracy/std": 0.43469253182411194, + "step": 115, + "train_speed(iter/s)": 0.018891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 420.0, + "completions/mean_length": 298.69049072265625, + "completions/min_length": 207.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.396484375, + "entropy/min": 0.2197265625, + "epoch": 0.116, + "grad_norm": 1.402990372757954, + "kl": 0.1533203125, + "learning_rate": 1.9439581691395065e-06, + "loss": 0.0015466272598132491, + "memory(GiB)": 146.12, + "reward": 1.3327633142471313, + "reward_std": 0.27393803000450134, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1695629358291626, + "rewards/EvidenceHallucination/std": 0.31487464904785156, + "rewards/Evidence_Num_Record/mean": 2.190476179122925, + "rewards/Evidence_Num_Record/std": 0.45468270778656006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.29885080456733704, + "rewards/VideoAccuracy/std": 0.45135653018951416, + "step": 116, + "train_speed(iter/s)": 0.018918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 594.0, + "completions/mean_length": 319.26190185546875, + "completions/min_length": 209.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.375, + "entropy/min": 0.2177734375, + "epoch": 0.117, + "grad_norm": 1.1686409064284398, + "kl": 0.1455078125, + "learning_rate": 1.9429060139160616e-06, + "loss": 0.0014689366798847914, + "memory(GiB)": 146.12, + "reward": 1.408905029296875, + "reward_std": 0.11996100842952728, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2803170084953308, + "rewards/EvidenceHallucination/std": 0.41000500321388245, + "rewards/Evidence_Num_Record/mean": 2.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.45722994208335876, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.286175012588501, + "rewards/VideoAccuracy/std": 0.4182421863079071, + "step": 117, + "train_speed(iter/s)": 0.018936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 840.0, + "completions/mean_length": 437.90478515625, + "completions/min_length": 217.0, + "entropy/max": 0.80078125, + "entropy/mean": 0.35546875, + "entropy/min": 0.154296875, + "epoch": 0.118, + "grad_norm": 1.3390124812681243, + "kl": 0.09521484375, + "learning_rate": 1.9418443636395246e-06, + "loss": 0.0009612910216674209, + "memory(GiB)": 146.12, + "reward": 1.8514626026153564, + "reward_std": 0.32283729314804077, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31857970356941223, + "rewards/EvidenceHallucination/std": 0.3661547005176544, + "rewards/Evidence_Num_Record/mean": 2.809523820877075, + "rewards/Evidence_Num_Record/std": 0.7066960334777832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6544132828712463, + "rewards/VideoAccuracy/std": 0.482687383890152, + "step": 118, + "train_speed(iter/s)": 0.018925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 530.0, + "completions/mean_length": 289.8571472167969, + "completions/min_length": 168.0, + "entropy/max": 1.0, + "entropy/mean": 0.466796875, + "entropy/min": 0.166015625, + "epoch": 0.119, + "grad_norm": 1.8322000879365274, + "kl": 0.14453125, + "learning_rate": 1.940773229000702e-06, + "loss": 0.0014447685098275542, + "memory(GiB)": 146.12, + "reward": 1.6284502744674683, + "reward_std": 0.4649772346019745, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2851080298423767, + "rewards/EvidenceHallucination/std": 0.2923296093940735, + "rewards/Evidence_Num_Record/mean": 2.261904716491699, + "rewards/Evidence_Num_Record/std": 0.6647766828536987, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5714285969734192, + "rewards/VideoAccuracy/std": 0.5008702874183655, + "step": 119, + "train_speed(iter/s)": 0.018928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 566.0, + "completions/mean_length": 320.8809509277344, + "completions/min_length": 209.0, + "entropy/max": 0.83203125, + "entropy/mean": 0.41796875, + "entropy/min": 0.2392578125, + "epoch": 0.12, + "grad_norm": 1.2953318223478445, + "kl": 0.1396484375, + "learning_rate": 1.9396926207859082e-06, + "loss": 0.0013921773061156273, + "memory(GiB)": 146.12, + "reward": 1.2641671895980835, + "reward_std": 0.3343121409416199, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13318182528018951, + "rewards/EvidenceHallucination/std": 0.26720312237739563, + "rewards/Evidence_Num_Record/mean": 2.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.6339229345321655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.23753078281879425, + "rewards/VideoAccuracy/std": 0.430060476064682, + "step": 120, + "train_speed(iter/s)": 0.018922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 639.0, + "completions/mean_length": 446.40478515625, + "completions/min_length": 211.0, + "entropy/max": 0.5, + "entropy/mean": 0.310546875, + "entropy/min": 0.1923828125, + "epoch": 0.121, + "grad_norm": 1.120987905866189, + "kl": 0.09814453125, + "learning_rate": 1.9386025498768555e-06, + "loss": 0.0011982453288510442, + "memory(GiB)": 146.12, + "reward": 2.073885917663574, + "reward_std": 0.15345653891563416, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42954981327056885, + "rewards/EvidenceHallucination/std": 0.38178956508636475, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 0.7344991564750671, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7879758477210999, + "rewards/VideoAccuracy/std": 0.43794405460357666, + "step": 121, + "train_speed(iter/s)": 0.018874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 550.0, + "completions/mean_length": 369.452392578125, + "completions/min_length": 261.0, + "entropy/max": 1.3359375, + "entropy/mean": 0.53125, + "entropy/min": 0.30078125, + "epoch": 0.122, + "grad_norm": 1.516907029994994, + "kl": 0.1396484375, + "learning_rate": 1.937503027250546e-06, + "loss": 0.0014094719663262367, + "memory(GiB)": 146.12, + "reward": 1.7915791273117065, + "reward_std": 0.22216013073921204, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.445990651845932, + "rewards/EvidenceHallucination/std": 0.32774993777275085, + "rewards/Evidence_Num_Record/mean": 2.761904716491699, + "rewards/Evidence_Num_Record/std": 0.8499504923820496, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7142857313156128, + "rewards/VideoAccuracy/std": 0.45722997188568115, + "step": 122, + "train_speed(iter/s)": 0.018879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 497.0, + "completions/mean_length": 336.952392578125, + "completions/min_length": 206.0, + "entropy/max": 0.73046875, + "entropy/mean": 0.4140625, + "entropy/min": 0.2333984375, + "epoch": 0.123, + "grad_norm": 1.6973598296429866, + "kl": 0.1455078125, + "learning_rate": 1.9363940639791603e-06, + "loss": 0.001456602243706584, + "memory(GiB)": 146.12, + "reward": 1.4033061265945435, + "reward_std": 0.44239187240600586, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18754583597183228, + "rewards/EvidenceHallucination/std": 0.31474003195762634, + "rewards/Evidence_Num_Record/mean": 2.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.6712963581085205, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.36579692363739014, + "rewards/VideoAccuracy/std": 0.4749639332294464, + "step": 123, + "train_speed(iter/s)": 0.018884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 661.0, + "completions/mean_length": 399.3095397949219, + "completions/min_length": 250.0, + "entropy/max": 0.5078125, + "entropy/mean": 0.37109375, + "entropy/min": 0.1845703125, + "epoch": 0.124, + "grad_norm": 1.407556389216323, + "kl": 0.1328125, + "learning_rate": 1.9352756712299464e-06, + "loss": 0.0013447859091684222, + "memory(GiB)": 146.12, + "reward": 1.5876822471618652, + "reward_std": 0.318477988243103, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19310377538204193, + "rewards/EvidenceHallucination/std": 0.3203859031200409, + "rewards/Evidence_Num_Record/mean": 2.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.9444501399993896, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4157281816005707, + "rewards/VideoAccuracy/std": 0.4557120203971863, + "step": 124, + "train_speed(iter/s)": 0.018918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 830.0, + "completions/mean_length": 471.9761962890625, + "completions/min_length": 222.0, + "entropy/max": 1.7265625, + "entropy/mean": 0.55078125, + "entropy/min": 0.1865234375, + "epoch": 0.125, + "grad_norm": 1.4369879104798542, + "kl": 0.115234375, + "learning_rate": 1.9341478602651067e-06, + "loss": 0.0011676698923110962, + "memory(GiB)": 146.12, + "reward": 1.4673373699188232, + "reward_std": 0.3999050557613373, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1500246524810791, + "rewards/EvidenceHallucination/std": 0.2782667279243469, + "rewards/Evidence_Num_Record/mean": 3.047619104385376, + "rewards/Evidence_Num_Record/std": 1.0109734535217285, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.3706657886505127, + "rewards/VideoAccuracy/std": 0.4459346830844879, + "step": 125, + "train_speed(iter/s)": 0.018913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 570.0, + "completions/mean_length": 332.76190185546875, + "completions/min_length": 198.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.400390625, + "entropy/min": 0.23046875, + "epoch": 0.126, + "grad_norm": 1.5109059253680701, + "kl": 0.1552734375, + "learning_rate": 1.933010642441685e-06, + "loss": 0.0015631616115570068, + "memory(GiB)": 146.12, + "reward": 1.7317975759506226, + "reward_std": 0.3719199299812317, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3935978412628174, + "rewards/EvidenceHallucination/std": 0.3575633764266968, + "rewards/Evidence_Num_Record/mean": 2.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.6339229345321655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6530779004096985, + "rewards/VideoAccuracy/std": 0.471064954996109, + "step": 126, + "train_speed(iter/s)": 0.018898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 432.0, + "completions/mean_length": 295.3095397949219, + "completions/min_length": 199.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.380859375, + "entropy/min": 0.150390625, + "epoch": 0.127, + "grad_norm": 1.2879849893868394, + "kl": 0.1591796875, + "learning_rate": 1.9318640292114524e-06, + "loss": 0.0018035446992143989, + "memory(GiB)": 146.12, + "reward": 1.565708041191101, + "reward_std": 0.11706583946943283, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28319069743156433, + "rewards/EvidenceHallucination/std": 0.3761787414550781, + "rewards/Evidence_Num_Record/mean": 2.190476179122925, + "rewards/Evidence_Num_Record/std": 0.5054867267608643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4424029290676117, + "rewards/VideoAccuracy/std": 0.4586055278778076, + "step": 127, + "train_speed(iter/s)": 0.01891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 722.0, + "completions/mean_length": 483.8571472167969, + "completions/min_length": 227.0, + "entropy/max": 0.96484375, + "entropy/mean": 0.392578125, + "entropy/min": 0.169921875, + "epoch": 0.128, + "grad_norm": 1.2421907043489904, + "kl": 0.10107421875, + "learning_rate": 1.930708032120791e-06, + "loss": 0.0010285605676472187, + "memory(GiB)": 146.12, + "reward": 1.8038581609725952, + "reward_std": 0.32695096731185913, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3501226305961609, + "rewards/EvidenceHallucination/std": 0.3542228043079376, + "rewards/Evidence_Num_Record/mean": 3.2142858505249023, + "rewards/Evidence_Num_Record/std": 0.9761975407600403, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.600500226020813, + "rewards/VideoAccuracy/std": 0.43714261054992676, + "step": 128, + "train_speed(iter/s)": 0.018909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 617.0, + "completions/mean_length": 401.5, + "completions/min_length": 258.0, + "entropy/max": 0.88671875, + "entropy/mean": 0.466796875, + "entropy/min": 0.26953125, + "epoch": 0.129, + "grad_norm": 1.4672836541554346, + "kl": 0.1474609375, + "learning_rate": 1.929542662810579e-06, + "loss": 0.0014828164130449295, + "memory(GiB)": 146.12, + "reward": 1.5887269973754883, + "reward_std": 0.4017311930656433, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32458657026290894, + "rewards/EvidenceHallucination/std": 0.33995798230171204, + "rewards/Evidence_Num_Record/mean": 2.857142925262451, + "rewards/Evidence_Num_Record/std": 0.6833000779151917, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.523809552192688, + "rewards/VideoAccuracy/std": 0.5054867267608643, + "step": 129, + "train_speed(iter/s)": 0.018908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 409.0, + "completions/mean_length": 297.21429443359375, + "completions/min_length": 223.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.365234375, + "entropy/min": 0.2177734375, + "epoch": 0.13, + "grad_norm": 1.3237343949509217, + "kl": 0.1650390625, + "learning_rate": 1.9283679330160725e-06, + "loss": 0.001645385636948049, + "memory(GiB)": 146.12, + "reward": 1.3768378496170044, + "reward_std": 0.21993836760520935, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1668916642665863, + "rewards/EvidenceHallucination/std": 0.27208632230758667, + "rewards/Evidence_Num_Record/mean": 2.309523820877075, + "rewards/Evidence_Num_Record/std": 0.5174088478088379, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3434593677520752, + "rewards/VideoAccuracy/std": 0.4569014608860016, + "step": 130, + "train_speed(iter/s)": 0.018925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 791.0, + "completions/mean_length": 509.9761962890625, + "completions/min_length": 288.0, + "entropy/max": 0.890625, + "entropy/mean": 0.33203125, + "entropy/min": 0.1142578125, + "epoch": 0.131, + "grad_norm": 0.9172565634820042, + "kl": 0.10009765625, + "learning_rate": 1.9271838545667875e-06, + "loss": 0.0014220774173736572, + "memory(GiB)": 146.12, + "reward": 1.824385166168213, + "reward_std": 0.15645675361156464, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.295434832572937, + "rewards/EvidenceHallucination/std": 0.3719170093536377, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.7419721484184265, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.565298318862915, + "rewards/VideoAccuracy/std": 0.5190615057945251, + "step": 131, + "train_speed(iter/s)": 0.018919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 967.0, + "completions/mean_length": 436.0238037109375, + "completions/min_length": 213.0, + "entropy/max": 1.4765625, + "entropy/mean": 0.53515625, + "entropy/min": 0.25390625, + "epoch": 0.132, + "grad_norm": 1.272509803292311, + "kl": 0.1494140625, + "learning_rate": 1.92599043938638e-06, + "loss": 0.0015272954478859901, + "memory(GiB)": 146.12, + "reward": 1.377498984336853, + "reward_std": 0.3350825309753418, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2208285629749298, + "rewards/EvidenceHallucination/std": 0.3242204487323761, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 1.4323700666427612, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3333333432674408, + "rewards/VideoAccuracy/std": 0.47711876034736633, + "step": 132, + "train_speed(iter/s)": 0.018882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 476.0, + "completions/mean_length": 312.1190490722656, + "completions/min_length": 199.0, + "entropy/max": 1.03125, + "entropy/mean": 0.447265625, + "entropy/min": 0.2421875, + "epoch": 0.133, + "grad_norm": 1.251189024443508, + "kl": 0.1640625, + "learning_rate": 1.924787699492529e-06, + "loss": 0.0016607262659817934, + "memory(GiB)": 146.12, + "reward": 1.2626943588256836, + "reward_std": 0.12782520055770874, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.11290771514177322, + "rewards/EvidenceHallucination/std": 0.2381616085767746, + "rewards/Evidence_Num_Record/mean": 2.190476179122925, + "rewards/Evidence_Num_Record/std": 0.6712963581085205, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.240112766623497, + "rewards/VideoAccuracy/std": 0.40528300404548645, + "step": 133, + "train_speed(iter/s)": 0.018906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/mean_length": 407.9761962890625, + "completions/min_length": 276.0, + "entropy/max": 0.87890625, + "entropy/mean": 0.4140625, + "entropy/min": 0.181640625, + "epoch": 0.134, + "grad_norm": 1.4381078327036072, + "kl": 0.1474609375, + "learning_rate": 1.923575646996811e-06, + "loss": 0.0014882514951750636, + "memory(GiB)": 146.12, + "reward": 1.883029818534851, + "reward_std": 0.2080840915441513, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22200721502304077, + "rewards/EvidenceHallucination/std": 0.33083653450012207, + "rewards/Evidence_Num_Record/mean": 2.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.547404408454895, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7052949070930481, + "rewards/VideoAccuracy/std": 0.469309002161026, + "step": 134, + "train_speed(iter/s)": 0.018913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 746.0, + "completions/mean_length": 506.73809814453125, + "completions/min_length": 281.0, + "entropy/max": 1.75, + "entropy/mean": 0.57421875, + "entropy/min": 0.11767578125, + "epoch": 0.135, + "grad_norm": 1.2160665784268834, + "kl": 0.12353515625, + "learning_rate": 1.9223542941045815e-06, + "loss": 0.0012557308655232191, + "memory(GiB)": 146.12, + "reward": 1.589491367340088, + "reward_std": 0.43333733081817627, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2434537559747696, + "rewards/EvidenceHallucination/std": 0.32277417182922363, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.8050364255905151, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.47413405776023865, + "rewards/VideoAccuracy/std": 0.44823360443115234, + "step": 135, + "train_speed(iter/s)": 0.018885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 550.0, + "completions/mean_length": 361.6190490722656, + "completions/min_length": 225.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.43359375, + "entropy/min": 0.224609375, + "epoch": 0.136, + "grad_norm": 1.4554467565915103, + "kl": 0.171875, + "learning_rate": 1.92112365311485e-06, + "loss": 0.0017367280088365078, + "memory(GiB)": 146.12, + "reward": 1.5271995067596436, + "reward_std": 0.2954389452934265, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23232774436473846, + "rewards/EvidenceHallucination/std": 0.28536492586135864, + "rewards/Evidence_Num_Record/mean": 2.595238208770752, + "rewards/Evidence_Num_Record/std": 0.6270147562026978, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4807339608669281, + "rewards/VideoAccuracy/std": 0.5015210509300232, + "step": 136, + "train_speed(iter/s)": 0.018897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 693.0, + "completions/mean_length": 400.16668701171875, + "completions/min_length": 260.0, + "entropy/max": 0.515625, + "entropy/mean": 0.392578125, + "entropy/min": 0.2392578125, + "epoch": 0.137, + "grad_norm": 1.463680950683056, + "kl": 0.17578125, + "learning_rate": 1.9198837364201583e-06, + "loss": 0.0017780549824237823, + "memory(GiB)": 146.12, + "reward": 1.5801057815551758, + "reward_std": 0.18565520644187927, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31667351722717285, + "rewards/EvidenceHallucination/std": 0.40294042229652405, + "rewards/Evidence_Num_Record/mean": 2.809523820877075, + "rewards/Evidence_Num_Record/std": 0.7726449966430664, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.45010441541671753, + "rewards/VideoAccuracy/std": 0.4757658541202545, + "step": 137, + "train_speed(iter/s)": 0.018911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1106.0, + "completions/mean_length": 573.2142944335938, + "completions/min_length": 303.0, + "entropy/max": 1.328125, + "entropy/mean": 0.447265625, + "entropy/min": 0.115234375, + "epoch": 0.138, + "grad_norm": 1.3228258962917756, + "kl": 0.10791015625, + "learning_rate": 1.9186345565064534e-06, + "loss": 0.0010970378061756492, + "memory(GiB)": 146.12, + "reward": 1.7937687635421753, + "reward_std": 0.3511067032814026, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.269847571849823, + "rewards/EvidenceHallucination/std": 0.3559637665748596, + "rewards/Evidence_Num_Record/mean": 3.309523820877075, + "rewards/Evidence_Num_Record/std": 0.7804968953132629, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6112276911735535, + "rewards/VideoAccuracy/std": 0.42789316177368164, + "step": 138, + "train_speed(iter/s)": 0.018887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 668.0, + "completions/mean_length": 409.4285888671875, + "completions/min_length": 236.0, + "entropy/max": 0.9765625, + "entropy/mean": 0.451171875, + "entropy/min": 0.2177734375, + "epoch": 0.139, + "grad_norm": 1.370286784743664, + "kl": 0.171875, + "learning_rate": 1.9173761259529635e-06, + "loss": 0.0017487120348960161, + "memory(GiB)": 146.12, + "reward": 1.6621686220169067, + "reward_std": 0.28146690130233765, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33465176820755005, + "rewards/EvidenceHallucination/std": 0.32503581047058105, + "rewards/Evidence_Num_Record/mean": 2.809523820877075, + "rewards/Evidence_Num_Record/std": 0.7066960334777832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679574370384216, + "step": 139, + "train_speed(iter/s)": 0.018906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 597.0, + "completions/mean_length": 384.5, + "completions/min_length": 267.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.3984375, + "entropy/min": 0.251953125, + "epoch": 0.14, + "grad_norm": 1.421840708779392, + "kl": 0.1748046875, + "learning_rate": 1.9161084574320692e-06, + "loss": 0.0017522408161312342, + "memory(GiB)": 146.12, + "reward": 1.4421076774597168, + "reward_std": 0.21170346438884735, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28149551153182983, + "rewards/EvidenceHallucination/std": 0.3998582363128662, + "rewards/Evidence_Num_Record/mean": 2.857142925262451, + "rewards/Evidence_Num_Record/std": 0.6466208100318909, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3858085870742798, + "rewards/VideoAccuracy/std": 0.46080002188682556, + "step": 140, + "train_speed(iter/s)": 0.018897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 967.0, + "completions/mean_length": 508.1428527832031, + "completions/min_length": 250.0, + "entropy/max": 0.8671875, + "entropy/mean": 0.33984375, + "entropy/min": 0.11572265625, + "epoch": 0.141, + "grad_norm": 0.9979565047966674, + "kl": 0.11572265625, + "learning_rate": 1.91483156370918e-06, + "loss": 0.0013793944381177425, + "memory(GiB)": 146.12, + "reward": 2.0592873096466064, + "reward_std": 0.10511618852615356, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4793519973754883, + "rewards/EvidenceHallucination/std": 0.37820374965667725, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 0.9385906457901001, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7634169459342957, + "rewards/VideoAccuracy/std": 0.3961738646030426, + "step": 141, + "train_speed(iter/s)": 0.018916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 745.0, + "completions/mean_length": 470.90478515625, + "completions/min_length": 286.0, + "entropy/max": 1.7265625, + "entropy/mean": 0.578125, + "entropy/min": 0.259765625, + "epoch": 0.142, + "grad_norm": 1.2861904590160196, + "kl": 0.16796875, + "learning_rate": 1.9135454576426007e-06, + "loss": 0.0017059104284271598, + "memory(GiB)": 146.12, + "reward": 1.5634406805038452, + "reward_std": 0.34225767850875854, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3172035217285156, + "rewards/EvidenceHallucination/std": 0.3703348636627197, + "rewards/Evidence_Num_Record/mean": 3.0, + "rewards/Evidence_Num_Record/std": 0.7963330745697021, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5, + "rewards/VideoAccuracy/std": 0.5060608386993408, + "step": 142, + "train_speed(iter/s)": 0.018901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 536.0, + "completions/mean_length": 395.9047546386719, + "completions/min_length": 265.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.388671875, + "entropy/min": 0.2265625, + "epoch": 0.143, + "grad_norm": 1.304361941814836, + "kl": 0.1533203125, + "learning_rate": 1.912250152183405e-06, + "loss": 0.0015516172861680388, + "memory(GiB)": 146.12, + "reward": 1.2907675504684448, + "reward_std": 0.37060222029685974, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14767104387283325, + "rewards/EvidenceHallucination/std": 0.3083168566226959, + "rewards/Evidence_Num_Record/mean": 2.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.1788398027420044, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.261233389377594, + "rewards/VideoAccuracy/std": 0.44386783242225647, + "step": 143, + "train_speed(iter/s)": 0.01892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/mean_length": 545.5, + "completions/min_length": 242.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.37890625, + "entropy/min": 0.2021484375, + "epoch": 0.144, + "grad_norm": 1.095160698262503, + "kl": 0.1591796875, + "learning_rate": 1.910945660375305e-06, + "loss": 0.0016175673808902502, + "memory(GiB)": 146.12, + "reward": 1.6900311708450317, + "reward_std": 0.21441495418548584, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3284996449947357, + "rewards/EvidenceHallucination/std": 0.3852420151233673, + "rewards/Evidence_Num_Record/mean": 2.952380895614624, + "rewards/Evidence_Num_Record/std": 0.961512565612793, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4909977316856384, + "rewards/VideoAccuracy/std": 0.4495478868484497, + "step": 144, + "train_speed(iter/s)": 0.018899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 642.047607421875, + "completions/min_length": 350.0, + "entropy/max": 1.9140625, + "entropy/mean": 0.45703125, + "entropy/min": 0.04736328125, + "epoch": 0.145, + "grad_norm": 1.6791630968254418, + "kl": 0.2080078125, + "learning_rate": 1.9096319953545185e-06, + "loss": 0.0015657602343708277, + "memory(GiB)": 146.12, + "reward": 1.7518750429153442, + "reward_std": 0.31135857105255127, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3737892210483551, + "rewards/EvidenceHallucination/std": 0.3780079483985901, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.9891983270645142, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6223553419113159, + "rewards/VideoAccuracy/std": 0.4546234607696533, + "step": 145, + "train_speed(iter/s)": 0.018848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 558.0, + "completions/mean_length": 372.26190185546875, + "completions/min_length": 224.0, + "entropy/max": 0.77734375, + "entropy/mean": 0.41015625, + "entropy/min": 0.21875, + "epoch": 0.146, + "grad_norm": 1.3850014837970217, + "kl": 0.1982421875, + "learning_rate": 1.908309170349637e-06, + "loss": 0.0019767656922340393, + "memory(GiB)": 146.12, + "reward": 1.623852014541626, + "reward_std": 0.3224193751811981, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3113335371017456, + "rewards/EvidenceHallucination/std": 0.3489803671836853, + "rewards/Evidence_Num_Record/mean": 2.690476179122925, + "rewards/Evidence_Num_Record/std": 0.6803189516067505, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5615853071212769, + "rewards/VideoAccuracy/std": 0.4945571720600128, + "step": 146, + "train_speed(iter/s)": 0.018855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 688.0, + "completions/mean_length": 394.6428527832031, + "completions/min_length": 251.0, + "entropy/max": 0.54296875, + "entropy/mean": 0.396484375, + "entropy/min": 0.2080078125, + "epoch": 0.147, + "grad_norm": 1.246688801187698, + "kl": 0.17578125, + "learning_rate": 1.9069771986814948e-06, + "loss": 0.001971013844013214, + "memory(GiB)": 146.12, + "reward": 1.4567680358886719, + "reward_std": 0.1992015838623047, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21758872270584106, + "rewards/EvidenceHallucination/std": 0.3374274969100952, + "rewards/Evidence_Num_Record/mean": 2.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7741467356681824, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.34658363461494446, + "rewards/VideoAccuracy/std": 0.4328537881374359, + "step": 147, + "train_speed(iter/s)": 0.018876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1137.0, + "completions/mean_length": 662.4285888671875, + "completions/min_length": 333.0, + "entropy/max": 1.453125, + "entropy/mean": 0.35546875, + "entropy/min": 0.1220703125, + "epoch": 0.148, + "grad_norm": 1.0689311318311303, + "kl": 0.103515625, + "learning_rate": 1.9056360937630308e-06, + "loss": 0.001046686084009707, + "memory(GiB)": 146.12, + "reward": 1.8244465589523315, + "reward_std": 0.2906140685081482, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.382415235042572, + "rewards/EvidenceHallucination/std": 0.3936961889266968, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 1.3580747842788696, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6193921566009521, + "rewards/VideoAccuracy/std": 0.5261656045913696, + "step": 148, + "train_speed(iter/s)": 0.018862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 766.0, + "completions/mean_length": 440.69049072265625, + "completions/min_length": 274.0, + "entropy/max": 0.9375, + "entropy/mean": 0.462890625, + "entropy/min": 0.328125, + "epoch": 0.149, + "grad_norm": 1.4597438882707154, + "kl": 0.1767578125, + "learning_rate": 1.9042858690991573e-06, + "loss": 0.0017879819497466087, + "memory(GiB)": 146.12, + "reward": 1.8150097131729126, + "reward_std": 0.3396349847316742, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3845720887184143, + "rewards/EvidenceHallucination/std": 0.3300588130950928, + "rewards/Evidence_Num_Record/mean": 2.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.8406761288642883, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.738095223903656, + "rewards/VideoAccuracy/std": 0.44500064849853516, + "step": 149, + "train_speed(iter/s)": 0.018835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 500.0, + "completions/mean_length": 376.73809814453125, + "completions/min_length": 270.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.431640625, + "entropy/min": 0.298828125, + "epoch": 0.15, + "grad_norm": 1.4397330213071107, + "kl": 0.203125, + "learning_rate": 1.9029265382866213e-06, + "loss": 0.00204599485732615, + "memory(GiB)": 146.12, + "reward": 1.2717986106872559, + "reward_std": 0.3696291446685791, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.08266858756542206, + "rewards/EvidenceHallucination/std": 0.22534288465976715, + "rewards/Evidence_Num_Record/mean": 2.547619104385376, + "rewards/Evidence_Num_Record/std": 0.66999751329422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2552648186683655, + "rewards/VideoAccuracy/std": 0.37430235743522644, + "step": 150, + "train_speed(iter/s)": 0.018849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 975.0, + "completions/mean_length": 588.8333740234375, + "completions/min_length": 330.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.34765625, + "entropy/min": 0.10498046875, + "epoch": 0.151, + "grad_norm": 1.2760505828928683, + "kl": 0.11474609375, + "learning_rate": 1.901558115013869e-06, + "loss": 0.0011610446963459253, + "memory(GiB)": 146.12, + "reward": 2.082760810852051, + "reward_std": 0.2610321044921875, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4390549659729004, + "rewards/EvidenceHallucination/std": 0.35406696796417236, + "rewards/Evidence_Num_Record/mean": 3.0714285373687744, + "rewards/Evidence_Num_Record/std": 0.6768959760665894, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.7997115850448608, + "rewards/VideoAccuracy/std": 0.4025106430053711, + "step": 151, + "train_speed(iter/s)": 0.018839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 850.0, + "completions/mean_length": 493.19049072265625, + "completions/min_length": 322.0, + "entropy/max": 1.1484375, + "entropy/mean": 0.5390625, + "entropy/min": 0.265625, + "epoch": 0.152, + "grad_norm": 1.3738644394782378, + "kl": 0.1865234375, + "learning_rate": 1.9001806130609077e-06, + "loss": 0.0018724360270425677, + "memory(GiB)": 146.12, + "reward": 1.5372685194015503, + "reward_std": 0.45580458641052246, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3053901493549347, + "rewards/EvidenceHallucination/std": 0.3542548418045044, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.7696326971054077, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4761904776096344, + "rewards/VideoAccuracy/std": 0.5054867267608643, + "step": 152, + "train_speed(iter/s)": 0.01885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 592.0, + "completions/mean_length": 395.5714416503906, + "completions/min_length": 253.0, + "entropy/max": 0.9140625, + "entropy/mean": 0.455078125, + "entropy/min": 0.2490234375, + "epoch": 0.153, + "grad_norm": 1.2599000503893267, + "kl": 0.1953125, + "learning_rate": 1.8987940462991669e-06, + "loss": 0.0019733128137886524, + "memory(GiB)": 146.12, + "reward": 1.5792686939239502, + "reward_std": 0.20565973222255707, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3271608352661133, + "rewards/EvidenceHallucination/std": 0.3908017575740814, + "rewards/Evidence_Num_Record/mean": 2.809523820877075, + "rewards/Evidence_Num_Record/std": 0.6339229345321655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5138365626335144, + "rewards/VideoAccuracy/std": 0.47636887431144714, + "step": 153, + "train_speed(iter/s)": 0.01886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1124.0, + "completions/mean_length": 494.952392578125, + "completions/min_length": 242.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.365234375, + "entropy/min": 0.171875, + "epoch": 0.154, + "grad_norm": 1.375291943581688, + "kl": 0.1650390625, + "learning_rate": 1.8973984286913583e-06, + "loss": 0.0018742814427241683, + "memory(GiB)": 146.12, + "reward": 1.7530419826507568, + "reward_std": 0.14951765537261963, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.324443519115448, + "rewards/EvidenceHallucination/std": 0.3472951352596283, + "rewards/Evidence_Num_Record/mean": 2.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.7297399044036865, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5548199415206909, + "rewards/VideoAccuracy/std": 0.4582982361316681, + "step": 154, + "train_speed(iter/s)": 0.018859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 909.0, + "completions/mean_length": 511.76190185546875, + "completions/min_length": 252.0, + "entropy/max": 1.3203125, + "entropy/mean": 0.50390625, + "entropy/min": 0.1259765625, + "epoch": 0.155, + "grad_norm": 1.2741065524336352, + "kl": 0.154296875, + "learning_rate": 1.8959937742913357e-06, + "loss": 0.0015646511455997825, + "memory(GiB)": 146.12, + "reward": 1.5881472826004028, + "reward_std": 0.31238678097724915, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2351822406053543, + "rewards/EvidenceHallucination/std": 0.38644734025001526, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 1.0873574018478394, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.47444427013397217, + "rewards/VideoAccuracy/std": 0.5682789087295532, + "step": 155, + "train_speed(iter/s)": 0.018856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 526.0, + "completions/mean_length": 366.1190490722656, + "completions/min_length": 264.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.390625, + "entropy/min": 0.291015625, + "epoch": 0.156, + "grad_norm": 1.6022103852657883, + "kl": 0.2021484375, + "learning_rate": 1.8945800972439537e-06, + "loss": 0.0020145419985055923, + "memory(GiB)": 146.12, + "reward": 1.6180630922317505, + "reward_std": 0.3773413300514221, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3580820858478546, + "rewards/EvidenceHallucination/std": 0.3930751085281372, + "rewards/Evidence_Num_Record/mean": 2.690476179122925, + "rewards/Evidence_Num_Record/std": 0.7152722477912903, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.546446681022644, + "rewards/VideoAccuracy/std": 0.5027092099189758, + "step": 156, + "train_speed(iter/s)": 0.018853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 569.0, + "completions/mean_length": 389.9047546386719, + "completions/min_length": 246.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.423828125, + "entropy/min": 0.318359375, + "epoch": 0.157, + "grad_norm": 1.4581570659736076, + "kl": 0.19140625, + "learning_rate": 1.8931574117849238e-06, + "loss": 0.0019151073647662997, + "memory(GiB)": 146.12, + "reward": 1.7230446338653564, + "reward_std": 0.22620552778244019, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3397175073623657, + "rewards/EvidenceHallucination/std": 0.3540821075439453, + "rewards/Evidence_Num_Record/mean": 2.761904716491699, + "rewards/Evidence_Num_Record/std": 0.6172133684158325, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5884343981742859, + "rewards/VideoAccuracy/std": 0.47797274589538574, + "step": 157, + "train_speed(iter/s)": 0.018866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1030.0, + "completions/mean_length": 554.0238037109375, + "completions/min_length": 305.0, + "entropy/max": 1.3046875, + "entropy/mean": 0.375, + "entropy/min": 0.12451171875, + "epoch": 0.158, + "grad_norm": 1.0588912913862258, + "kl": 0.12109375, + "learning_rate": 1.8917257322406732e-06, + "loss": 0.0014288020320236683, + "memory(GiB)": 146.12, + "reward": 1.8552395105361938, + "reward_std": 0.1809217780828476, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2964218258857727, + "rewards/EvidenceHallucination/std": 0.3188343048095703, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.8621610999107361, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.662621796131134, + "rewards/VideoAccuracy/std": 0.40555980801582336, + "step": 158, + "train_speed(iter/s)": 0.018863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1177.0, + "completions/mean_length": 476.4285888671875, + "completions/min_length": 277.0, + "entropy/max": 2.0, + "entropy/mean": 0.54296875, + "entropy/min": 0.28515625, + "epoch": 0.159, + "grad_norm": 1.4483058805078088, + "kl": 0.1953125, + "learning_rate": 1.8902850730281989e-06, + "loss": 0.001996553037315607, + "memory(GiB)": 146.12, + "reward": 1.9349104166030884, + "reward_std": 0.29467448592185974, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5078843235969543, + "rewards/EvidenceHallucination/std": 0.3710263967514038, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 1.605196237564087, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8333333730697632, + "rewards/VideoAccuracy/std": 0.37719547748565674, + "step": 159, + "train_speed(iter/s)": 0.018853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 516.0, + "completions/mean_length": 391.5, + "completions/min_length": 244.0, + "entropy/max": 0.78515625, + "entropy/mean": 0.416015625, + "entropy/min": 0.2333984375, + "epoch": 0.16, + "grad_norm": 1.3757762186406606, + "kl": 0.21484375, + "learning_rate": 1.8888354486549234e-06, + "loss": 0.0021661133505403996, + "memory(GiB)": 146.12, + "reward": 1.4399813413619995, + "reward_std": 0.225159153342247, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23081883788108826, + "rewards/EvidenceHallucination/std": 0.34395676851272583, + "rewards/Evidence_Num_Record/mean": 2.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.7120173573493958, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.39381763339042664, + "rewards/VideoAccuracy/std": 0.4535292387008667, + "step": 160, + "train_speed(iter/s)": 0.018859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1217.0, + "completions/mean_length": 572.7142944335938, + "completions/min_length": 309.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.314453125, + "entropy/min": 0.12255859375, + "epoch": 0.161, + "grad_norm": 1.218305374780841, + "kl": 0.1279296875, + "learning_rate": 1.8873768737185478e-06, + "loss": 0.0012988243252038956, + "memory(GiB)": 146.12, + "reward": 2.1747634410858154, + "reward_std": 0.13627514243125916, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4461142420768738, + "rewards/EvidenceHallucination/std": 0.3420378565788269, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.9865530133247375, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8855405449867249, + "rewards/VideoAccuracy/std": 0.18643184006214142, + "step": 161, + "train_speed(iter/s)": 0.01882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 873.0, + "completions/mean_length": 485.8333435058594, + "completions/min_length": 229.0, + "entropy/max": 1.421875, + "entropy/mean": 0.53125, + "entropy/min": 0.29296875, + "epoch": 0.162, + "grad_norm": 1.407074539091761, + "kl": 0.2021484375, + "learning_rate": 1.8859093629069056e-06, + "loss": 0.002040162682533264, + "memory(GiB)": 146.12, + "reward": 1.7553484439849854, + "reward_std": 0.35698962211608887, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4434085786342621, + "rewards/EvidenceHallucination/std": 0.3551959693431854, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 1.1699390411376953, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711876034736633, + "step": 162, + "train_speed(iter/s)": 0.018837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 684.0, + "completions/mean_length": 397.1428527832031, + "completions/min_length": 269.0, + "entropy/max": 1.046875, + "entropy/mean": 0.447265625, + "entropy/min": 0.25390625, + "epoch": 0.163, + "grad_norm": 1.1432921146706652, + "kl": 0.2099609375, + "learning_rate": 1.8844329309978143e-06, + "loss": 0.0021164161153137684, + "memory(GiB)": 146.12, + "reward": 1.4413707256317139, + "reward_std": 0.2937353849411011, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24317666888237, + "rewards/EvidenceHallucination/std": 0.36700868606567383, + "rewards/Evidence_Num_Record/mean": 2.761904716491699, + "rewards/Evidence_Num_Record/std": 0.5763435363769531, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3927353620529175, + "rewards/VideoAccuracy/std": 0.47045132517814636, + "step": 163, + "train_speed(iter/s)": 0.018824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1109.0, + "completions/mean_length": 437.0714416503906, + "completions/min_length": 277.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.373046875, + "entropy/min": 0.1748046875, + "epoch": 0.164, + "grad_norm": 1.0396406883407332, + "kl": 0.1826171875, + "learning_rate": 1.8829475928589268e-06, + "loss": 0.0020347093231976032, + "memory(GiB)": 146.12, + "reward": 1.7839725017547607, + "reward_std": 0.04425010085105896, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40866315364837646, + "rewards/EvidenceHallucination/std": 0.37608444690704346, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 1.2699053287506104, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5689064860343933, + "rewards/VideoAccuracy/std": 0.42913714051246643, + "step": 164, + "train_speed(iter/s)": 0.018841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1088.0, + "completions/mean_length": 554.1666870117188, + "completions/min_length": 250.0, + "entropy/max": 1.7421875, + "entropy/mean": 0.58203125, + "entropy/min": 0.1220703125, + "epoch": 0.165, + "grad_norm": 1.1222930350902727, + "kl": 0.154296875, + "learning_rate": 1.881453363447582e-06, + "loss": 0.0015880585415288806, + "memory(GiB)": 146.12, + "reward": 1.9694316387176514, + "reward_std": 0.30417174100875854, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5211167931556702, + "rewards/EvidenceHallucination/std": 0.3707961440086365, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 1.2195179462432861, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7985416054725647, + "rewards/VideoAccuracy/std": 0.37672126293182373, + "step": 165, + "train_speed(iter/s)": 0.018829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 613.0, + "completions/mean_length": 397.6190490722656, + "completions/min_length": 274.0, + "entropy/max": 0.5625, + "entropy/mean": 0.41796875, + "entropy/min": 0.28125, + "epoch": 0.166, + "grad_norm": 1.1794028567931532, + "kl": 0.2177734375, + "learning_rate": 1.8799502578106532e-06, + "loss": 0.0021678453776985407, + "memory(GiB)": 146.12, + "reward": 1.5557883977890015, + "reward_std": 0.0970194861292839, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35668614506721497, + "rewards/EvidenceHallucination/std": 0.39013928174972534, + "rewards/Evidence_Num_Record/mean": 3.0238096714019775, + "rewards/Evidence_Num_Record/std": 0.7485952973365784, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.484451025724411, + "rewards/VideoAccuracy/std": 0.49885839223861694, + "step": 166, + "train_speed(iter/s)": 0.018822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 668.0, + "completions/mean_length": 372.23809814453125, + "completions/min_length": 244.0, + "entropy/max": 0.59765625, + "entropy/mean": 0.384765625, + "entropy/min": 0.263671875, + "epoch": 0.167, + "grad_norm": 1.2363471449646588, + "kl": 0.2197265625, + "learning_rate": 1.8784382910843975e-06, + "loss": 0.002421202138066292, + "memory(GiB)": 146.12, + "reward": 1.3132166862487793, + "reward_std": 0.1221097931265831, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14486762881278992, + "rewards/EvidenceHallucination/std": 0.319282203912735, + "rewards/Evidence_Num_Record/mean": 2.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.8125753402709961, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.21757635474205017, + "rewards/VideoAccuracy/std": 0.3917587995529175, + "step": 167, + "train_speed(iter/s)": 0.018835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 803.0, + "completions/mean_length": 514.0238037109375, + "completions/min_length": 317.0, + "entropy/max": 1.4375, + "entropy/mean": 0.421875, + "entropy/min": 0.1376953125, + "epoch": 0.168, + "grad_norm": 0.8942918327959201, + "kl": 0.1455078125, + "learning_rate": 1.8769174784943029e-06, + "loss": 0.0014676781138405204, + "memory(GiB)": 146.12, + "reward": 1.5616717338562012, + "reward_std": 0.07717376947402954, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14648951590061188, + "rewards/EvidenceHallucination/std": 0.30986353754997253, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 1.087624430656433, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.39904049038887024, + "rewards/VideoAccuracy/std": 0.4326857328414917, + "step": 168, + "train_speed(iter/s)": 0.018832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 562.0, + "completions/mean_length": 399.5714416503906, + "completions/min_length": 232.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.439453125, + "entropy/min": 0.283203125, + "epoch": 0.169, + "grad_norm": 1.160434657100405, + "kl": 0.259765625, + "learning_rate": 1.8753878353549355e-06, + "loss": 0.0026328868698328733, + "memory(GiB)": 146.12, + "reward": 1.3136122226715088, + "reward_std": 0.21220001578330994, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19901356101036072, + "rewards/EvidenceHallucination/std": 0.3090404272079468, + "rewards/Evidence_Num_Record/mean": 3.309523820877075, + "rewards/Evidence_Num_Record/std": 0.8111447691917419, + "rewards/Format/mean": 0.8333333730697632, + "rewards/Format/std": 0.37719547748565674, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3571428656578064, + "rewards/VideoAccuracy/std": 0.48496562242507935, + "step": 169, + "train_speed(iter/s)": 0.018843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 545.0, + "completions/mean_length": 374.8333435058594, + "completions/min_length": 278.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.431640625, + "entropy/min": 0.27734375, + "epoch": 0.17, + "grad_norm": 1.1330419994071816, + "kl": 0.2294921875, + "learning_rate": 1.873849377069785e-06, + "loss": 0.002298670820891857, + "memory(GiB)": 146.12, + "reward": 1.2922805547714233, + "reward_std": 0.29446443915367126, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22717873752117157, + "rewards/EvidenceHallucination/std": 0.3880119323730469, + "rewards/Evidence_Num_Record/mean": 2.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.7485953569412231, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.24684475362300873, + "rewards/VideoAccuracy/std": 0.41650208830833435, + "step": 170, + "train_speed(iter/s)": 0.018827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/mean_length": 486.90478515625, + "completions/min_length": 318.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.302734375, + "entropy/min": 0.10595703125, + "epoch": 0.171, + "grad_norm": 1.0696445418655214, + "kl": 0.1669921875, + "learning_rate": 1.8723021191311089e-06, + "loss": 0.0018870094791054726, + "memory(GiB)": 146.12, + "reward": 1.9103014469146729, + "reward_std": 0.09925421327352524, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3888092041015625, + "rewards/EvidenceHallucination/std": 0.4217150807380676, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.6339229345321655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.6373014450073242, + "rewards/VideoAccuracy/std": 0.4130142033100128, + "step": 171, + "train_speed(iter/s)": 0.018842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 769.0, + "completions/mean_length": 439.5952453613281, + "completions/min_length": 306.0, + "entropy/max": 1.8671875, + "entropy/mean": 0.51953125, + "entropy/min": 0.271484375, + "epoch": 0.172, + "grad_norm": 1.075234487382607, + "kl": 0.2373046875, + "learning_rate": 1.8707460771197771e-06, + "loss": 0.002409239998087287, + "memory(GiB)": 146.12, + "reward": 1.3555196523666382, + "reward_std": 0.20930641889572144, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22997884452342987, + "rewards/EvidenceHallucination/std": 0.3749256730079651, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.8611501455307007, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 172, + "train_speed(iter/s)": 0.018847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 548.0, + "completions/mean_length": 384.0, + "completions/min_length": 249.0, + "entropy/max": 0.86328125, + "entropy/mean": 0.408203125, + "entropy/min": 0.275390625, + "epoch": 0.173, + "grad_norm": 1.2968746910553395, + "kl": 0.2451171875, + "learning_rate": 1.869181266705116e-06, + "loss": 0.002461865544319153, + "memory(GiB)": 146.12, + "reward": 1.5350407361984253, + "reward_std": 0.28135740756988525, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3513045310974121, + "rewards/EvidenceHallucination/std": 0.3848385810852051, + "rewards/Evidence_Num_Record/mean": 3.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.5500501394271851, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.46477967500686646, + "rewards/VideoAccuracy/std": 0.47883597016334534, + "step": 173, + "train_speed(iter/s)": 0.01886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 830.0, + "completions/mean_length": 440.3333435058594, + "completions/min_length": 290.0, + "entropy/max": 1.15625, + "entropy/mean": 0.482421875, + "entropy/min": 0.138671875, + "epoch": 0.174, + "grad_norm": 1.8101136368963897, + "kl": 0.2109375, + "learning_rate": 1.867607703644749e-06, + "loss": 0.0021419243421405554, + "memory(GiB)": 146.12, + "reward": 1.55290949344635, + "reward_std": 0.13344544172286987, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2585713565349579, + "rewards/EvidenceHallucination/std": 0.3755526840686798, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.8781778812408447, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.377385675907135, + "rewards/VideoAccuracy/std": 0.48580801486968994, + "step": 174, + "train_speed(iter/s)": 0.018877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 668.0, + "completions/mean_length": 499.5, + "completions/min_length": 339.0, + "entropy/max": 0.75, + "entropy/mean": 0.376953125, + "entropy/min": 0.1865234375, + "epoch": 0.175, + "grad_norm": 1.0080847323448974, + "kl": 0.1806640625, + "learning_rate": 1.8660254037844386e-06, + "loss": 0.0018150052055716515, + "memory(GiB)": 146.12, + "reward": 1.5558075904846191, + "reward_std": 0.2614729702472687, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2841332256793976, + "rewards/EvidenceHallucination/std": 0.380180686712265, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.8981203436851501, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.43231427669525146, + "rewards/VideoAccuracy/std": 0.4836465120315552, + "step": 175, + "train_speed(iter/s)": 0.018874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 548.0, + "completions/mean_length": 413.3333435058594, + "completions/min_length": 292.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.3984375, + "entropy/min": 0.255859375, + "epoch": 0.176, + "grad_norm": 1.3365246989631465, + "kl": 0.26171875, + "learning_rate": 1.8644343830579267e-06, + "loss": 0.0026401884388178587, + "memory(GiB)": 146.12, + "reward": 1.65059232711792, + "reward_std": 0.32872068881988525, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39835885167121887, + "rewards/EvidenceHallucination/std": 0.36213111877441406, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.49679574370384216, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5709205269813538, + "rewards/VideoAccuracy/std": 0.5004269480705261, + "step": 176, + "train_speed(iter/s)": 0.018869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 629.0, + "completions/mean_length": 434.66668701171875, + "completions/min_length": 254.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.4140625, + "entropy/min": 0.265625, + "epoch": 0.177, + "grad_norm": 1.1561926358233798, + "kl": 0.2373046875, + "learning_rate": 1.8628346574867744e-06, + "loss": 0.0023757475428283215, + "memory(GiB)": 146.12, + "reward": 1.5172730684280396, + "reward_std": 0.1766224354505539, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2606974244117737, + "rewards/EvidenceHallucination/std": 0.37932664155960083, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.800696611404419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.39846688508987427, + "rewards/VideoAccuracy/std": 0.5227741599082947, + "step": 177, + "train_speed(iter/s)": 0.018884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 763.0, + "completions/mean_length": 525.1666870117188, + "completions/min_length": 368.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.337890625, + "entropy/min": 0.1171875, + "epoch": 0.178, + "grad_norm": 1.2254625874796086, + "kl": 0.1533203125, + "learning_rate": 1.8612262431802006e-06, + "loss": 0.0015669530257582664, + "memory(GiB)": 146.12, + "reward": 2.0970797538757324, + "reward_std": 0.25572848320007324, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3758601248264313, + "rewards/EvidenceHallucination/std": 0.3398178815841675, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.8785083889961243, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8885742425918579, + "rewards/VideoAccuracy/std": 0.3620957136154175, + "step": 178, + "train_speed(iter/s)": 0.018881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/mean_length": 500.4285888671875, + "completions/min_length": 279.0, + "entropy/max": 0.79296875, + "entropy/mean": 0.384765625, + "entropy/min": 0.154296875, + "epoch": 0.179, + "grad_norm": 1.261062645250815, + "kl": 0.24609375, + "learning_rate": 1.859609156334919e-06, + "loss": 0.0025176331400871277, + "memory(GiB)": 146.12, + "reward": 1.5994611978530884, + "reward_std": 0.20451365411281586, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37825754284858704, + "rewards/EvidenceHallucination/std": 0.38394975662231445, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.9959309101104736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.523809552192688, + "rewards/VideoAccuracy/std": 0.5054867267608643, + "step": 179, + "train_speed(iter/s)": 0.018878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 527.0, + "completions/mean_length": 370.4761962890625, + "completions/min_length": 280.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.38671875, + "entropy/min": 0.244140625, + "epoch": 0.18, + "grad_norm": 1.3646103318841316, + "kl": 0.263671875, + "learning_rate": 1.857983413234977e-06, + "loss": 0.0028454954735934734, + "memory(GiB)": 146.12, + "reward": 1.529546856880188, + "reward_std": 0.35319921374320984, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2201814204454422, + "rewards/EvidenceHallucination/std": 0.376852422952652, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.4722250998020172, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4855104684829712, + "rewards/VideoAccuracy/std": 0.46320217847824097, + "step": 180, + "train_speed(iter/s)": 0.018891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 773.0, + "completions/mean_length": 544.0, + "completions/min_length": 305.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.3359375, + "entropy/min": 0.125, + "epoch": 0.181, + "grad_norm": 1.1193707597104698, + "kl": 0.1611328125, + "learning_rate": 1.856349030251589e-06, + "loss": 0.0016444935463368893, + "memory(GiB)": 146.12, + "reward": 2.023716449737549, + "reward_std": 0.14536447823047638, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.34102120995521545, + "rewards/EvidenceHallucination/std": 0.3933142125606537, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 1.1087760925292969, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8571428656578064, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.7840837836265564, + "rewards/VideoAccuracy/std": 0.4742104113101959, + "step": 181, + "train_speed(iter/s)": 0.018897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1135.0, + "completions/mean_length": 513.047607421875, + "completions/min_length": 332.0, + "entropy/max": 1.046875, + "entropy/mean": 0.484375, + "entropy/min": 0.2138671875, + "epoch": 0.182, + "grad_norm": 1.5659821882077638, + "kl": 0.279296875, + "learning_rate": 1.8547060238429735e-06, + "loss": 0.0028410868253558874, + "memory(GiB)": 146.12, + "reward": 1.740407943725586, + "reward_std": 0.23701772093772888, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4877532720565796, + "rewards/EvidenceHallucination/std": 0.3771217167377472, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.1746450662612915, + "rewards/Format/mean": 0.9047619104385376, + "rewards/Format/std": 0.2971017360687256, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6904761791229248, + "rewards/VideoAccuracy/std": 0.4679011106491089, + "step": 182, + "train_speed(iter/s)": 0.018861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 720.0, + "completions/mean_length": 409.0476379394531, + "completions/min_length": 243.0, + "entropy/max": 0.5625, + "entropy/mean": 0.345703125, + "entropy/min": 0.09326171875, + "epoch": 0.183, + "grad_norm": 1.1197768307253084, + "kl": 0.29296875, + "learning_rate": 1.853054410554187e-06, + "loss": 0.0029866299591958523, + "memory(GiB)": 146.12, + "reward": 1.4120022058486938, + "reward_std": 0.3056322932243347, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29936572909355164, + "rewards/EvidenceHallucination/std": 0.4119129180908203, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.7543909549713135, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3640337586402893, + "rewards/VideoAccuracy/std": 0.4702302813529968, + "step": 183, + "train_speed(iter/s)": 0.018871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1037.0, + "completions/mean_length": 517.8095092773438, + "completions/min_length": 321.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.390625, + "entropy/min": 0.1494140625, + "epoch": 0.184, + "grad_norm": 1.141835746927161, + "kl": 0.2421875, + "learning_rate": 1.8513942070169568e-06, + "loss": 0.0026467707939445972, + "memory(GiB)": 146.12, + "reward": 1.7807577848434448, + "reward_std": 0.2340150773525238, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31633201241493225, + "rewards/EvidenceHallucination/std": 0.37761422991752625, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.771516740322113, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5714285969734192, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.6032055020332336, + "rewards/VideoAccuracy/std": 0.5538917779922485, + "step": 184, + "train_speed(iter/s)": 0.018871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1120.0, + "completions/mean_length": 661.5952758789062, + "completions/min_length": 330.0, + "entropy/max": 0.62109375, + "entropy/mean": 0.357421875, + "entropy/min": 0.109375, + "epoch": 0.185, + "grad_norm": 1.2075006581057381, + "kl": 0.1943359375, + "learning_rate": 1.8497254299495145e-06, + "loss": 0.001978288171812892, + "memory(GiB)": 146.12, + "reward": 1.7457631826400757, + "reward_std": 0.4265967011451721, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.2883176803588867, + "rewards/EvidenceHallucination/std": 0.31130462884902954, + "rewards/Evidence_Num_Record/mean": 4.5, + "rewards/Evidence_Num_Record/std": 1.5499804019927979, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.6428614854812622, + "rewards/VideoAccuracy/std": 0.4330059587955475, + "step": 185, + "train_speed(iter/s)": 0.018865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 652.0, + "completions/mean_length": 452.3333435058594, + "completions/min_length": 318.0, + "entropy/max": 1.2734375, + "entropy/mean": 0.42578125, + "entropy/min": 0.259765625, + "epoch": 0.186, + "grad_norm": 1.2800973114877825, + "kl": 0.2890625, + "learning_rate": 1.8480480961564257e-06, + "loss": 0.002903138054534793, + "memory(GiB)": 146.12, + "reward": 1.6066404581069946, + "reward_std": 0.30020588636398315, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35733169317245483, + "rewards/EvidenceHallucination/std": 0.3904092311859131, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.0348178148269653, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5351739525794983, + "rewards/VideoAccuracy/std": 0.49298569560050964, + "step": 186, + "train_speed(iter/s)": 0.018857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1105.0, + "completions/mean_length": 494.952392578125, + "completions/min_length": 324.0, + "entropy/max": 0.83984375, + "entropy/mean": 0.421875, + "entropy/min": 0.244140625, + "epoch": 0.187, + "grad_norm": 1.237521066789628, + "kl": 0.26953125, + "learning_rate": 1.846362222528424e-06, + "loss": 0.0027416504453867674, + "memory(GiB)": 146.12, + "reward": 1.415924310684204, + "reward_std": 0.2676616311073303, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20467409491539001, + "rewards/EvidenceHallucination/std": 0.3532085418701172, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 2.046072006225586, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.32737040519714355, + "rewards/VideoAccuracy/std": 0.4514332413673401, + "step": 187, + "train_speed(iter/s)": 0.018878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 878.0, + "completions/mean_length": 571.3095092773438, + "completions/min_length": 321.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.33984375, + "entropy/min": 0.11767578125, + "epoch": 0.188, + "grad_norm": 1.1344193286449087, + "kl": 0.1787109375, + "learning_rate": 1.8446678260422384e-06, + "loss": 0.001828239532187581, + "memory(GiB)": 146.12, + "reward": 1.9192054271697998, + "reward_std": 0.2180202454328537, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40210607647895813, + "rewards/EvidenceHallucination/std": 0.36509448289871216, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 1.148902416229248, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.7149747610092163, + "rewards/VideoAccuracy/std": 0.42083168029785156, + "step": 188, + "train_speed(iter/s)": 0.018871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 762.0, + "completions/mean_length": 479.952392578125, + "completions/min_length": 328.0, + "entropy/max": 0.79296875, + "entropy/mean": 0.404296875, + "entropy/min": 0.2412109375, + "epoch": 0.189, + "grad_norm": 1.2312451315706627, + "kl": 0.294921875, + "learning_rate": 1.8429649237604214e-06, + "loss": 0.0029576425440609455, + "memory(GiB)": 146.12, + "reward": 1.6795845031738281, + "reward_std": 0.22131046652793884, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42173194885253906, + "rewards/EvidenceHallucination/std": 0.36969906091690063, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.064690351486206, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679577350616455, + "step": 189, + "train_speed(iter/s)": 0.018884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 833.0, + "completions/mean_length": 450.6428527832031, + "completions/min_length": 323.0, + "entropy/max": 1.0078125, + "entropy/mean": 0.3984375, + "entropy/min": 0.2431640625, + "epoch": 0.19, + "grad_norm": 1.2392294157062458, + "kl": 0.275390625, + "learning_rate": 1.8412535328311812e-06, + "loss": 0.002766430377960205, + "memory(GiB)": 146.12, + "reward": 1.594218373298645, + "reward_std": 0.17641466856002808, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.208940327167511, + "rewards/EvidenceHallucination/std": 0.34979137778282166, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 1.0135550498962402, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5524303913116455, + "rewards/VideoAccuracy/std": 0.48600202798843384, + "step": 190, + "train_speed(iter/s)": 0.018867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1143.0, + "completions/mean_length": 619.3333740234375, + "completions/min_length": 386.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.357421875, + "entropy/min": 0.12109375, + "epoch": 0.191, + "grad_norm": 1.1769789565441817, + "kl": 0.1806640625, + "learning_rate": 1.8395336704882047e-06, + "loss": 0.0018596196314319968, + "memory(GiB)": 146.12, + "reward": 2.0896453857421875, + "reward_std": 0.23886415362358093, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48446333408355713, + "rewards/EvidenceHallucination/std": 0.3222953677177429, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.494861364364624, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.7142857313156128, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.8498954772949219, + "rewards/VideoAccuracy/std": 0.36691805720329285, + "step": 191, + "train_speed(iter/s)": 0.018856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1011.0, + "completions/mean_length": 551.2619018554688, + "completions/min_length": 286.0, + "entropy/max": 0.625, + "entropy/mean": 0.42578125, + "entropy/min": 0.240234375, + "epoch": 0.192, + "grad_norm": 0.8012267253059627, + "kl": 0.265625, + "learning_rate": 1.8378053540504871e-06, + "loss": 0.0027068699710071087, + "memory(GiB)": 146.12, + "reward": 1.3803496360778809, + "reward_std": 0.007227580063045025, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2350817620754242, + "rewards/EvidenceHallucination/std": 0.3490041494369507, + "rewards/Evidence_Num_Record/mean": 5.023809432983398, + "rewards/Evidence_Num_Record/std": 1.918944001197815, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3333333432674408, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 192, + "train_speed(iter/s)": 0.018883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 506.0, + "completions/min_length": 308.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.40234375, + "entropy/min": 0.058349609375, + "epoch": 0.193, + "grad_norm": 0.6008917441507103, + "kl": 0.271484375, + "learning_rate": 1.8360686009221558e-06, + "loss": 0.002894636942073703, + "memory(GiB)": 146.12, + "reward": 1.1222654581069946, + "reward_std": 0.10150207579135895, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.10451966524124146, + "rewards/EvidenceHallucination/std": 0.28947871923446655, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.2337208986282349, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.11326626688241959, + "rewards/VideoAccuracy/std": 0.287361741065979, + "step": 193, + "train_speed(iter/s)": 0.01886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1009.0, + "completions/mean_length": 521.357177734375, + "completions/min_length": 284.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.373046875, + "entropy/min": 0.1318359375, + "epoch": 0.194, + "grad_norm": 1.2395668477965114, + "kl": 0.25, + "learning_rate": 1.8343234285922952e-06, + "loss": 0.0025251524057239294, + "memory(GiB)": 146.12, + "reward": 1.9593502283096313, + "reward_std": 0.19612763822078705, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42780205607414246, + "rewards/EvidenceHallucination/std": 0.3869655430316925, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.5897464752197266, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5476190447807312, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.7642660737037659, + "rewards/VideoAccuracy/std": 0.501839816570282, + "step": 194, + "train_speed(iter/s)": 0.018881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/mean_length": 682.5, + "completions/min_length": 403.0, + "entropy/max": 0.7109375, + "entropy/mean": 0.349609375, + "entropy/min": 0.1513671875, + "epoch": 0.195, + "grad_norm": 1.0347054760255026, + "kl": 0.19140625, + "learning_rate": 1.832569854634771e-06, + "loss": 0.001986590214073658, + "memory(GiB)": 146.12, + "reward": 1.6080275774002075, + "reward_std": 0.26118576526641846, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30487632751464844, + "rewards/EvidenceHallucination/std": 0.3554747700691223, + "rewards/Evidence_Num_Record/mean": 5.738095283508301, + "rewards/Evidence_Num_Record/std": 2.4099340438842773, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.48038557171821594, + "rewards/VideoAccuracy/std": 0.43697768449783325, + "step": 195, + "train_speed(iter/s)": 0.018862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 746.0, + "completions/mean_length": 467.66668701171875, + "completions/min_length": 281.0, + "entropy/max": 0.953125, + "entropy/mean": 0.376953125, + "entropy/min": 0.197265625, + "epoch": 0.196, + "grad_norm": 1.2986731761415105, + "kl": 0.298828125, + "learning_rate": 1.8308078967080545e-06, + "loss": 0.003042693017050624, + "memory(GiB)": 146.12, + "reward": 1.410247802734375, + "reward_std": 0.33110353350639343, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27933555841445923, + "rewards/EvidenceHallucination/std": 0.38819193840026855, + "rewards/Evidence_Num_Record/mean": 4.833333492279053, + "rewards/Evidence_Num_Record/std": 1.2477622032165527, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.35438072681427, + "rewards/VideoAccuracy/std": 0.4773140847682953, + "step": 196, + "train_speed(iter/s)": 0.018854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 763.0, + "completions/mean_length": 456.40478515625, + "completions/min_length": 303.0, + "entropy/max": 1.015625, + "entropy/mean": 0.423828125, + "entropy/min": 0.208984375, + "epoch": 0.197, + "grad_norm": 1.3153179989585684, + "kl": 0.2890625, + "learning_rate": 1.8290375725550415e-06, + "loss": 0.00291400752030313, + "memory(GiB)": 146.12, + "reward": 1.5776203870773315, + "reward_std": 0.3823457956314087, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29908379912376404, + "rewards/EvidenceHallucination/std": 0.384405255317688, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.2003192901611328, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.46790117025375366, + "rewards/VideoAccuracy/mean": 0.4558989405632019, + "rewards/VideoAccuracy/std": 0.5101503729820251, + "step": 197, + "train_speed(iter/s)": 0.018864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 875.0, + "completions/mean_length": 540.5952758789062, + "completions/min_length": 364.0, + "entropy/max": 1.6953125, + "entropy/mean": 0.435546875, + "entropy/min": 0.138671875, + "epoch": 0.198, + "grad_norm": 1.1143950762270292, + "kl": 0.203125, + "learning_rate": 1.827258900002877e-06, + "loss": 0.0020716446451842785, + "memory(GiB)": 146.12, + "reward": 2.1092474460601807, + "reward_std": 0.19651727378368378, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42437508702278137, + "rewards/EvidenceHallucination/std": 0.40561822056770325, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.09480881690979, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5952380895614624, + "rewards/HonestTime/std": 0.49679574370384216, + "rewards/VideoAccuracy/mean": 0.9053246974945068, + "rewards/VideoAccuracy/std": 0.4163197875022888, + "step": 198, + "train_speed(iter/s)": 0.018861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 721.0, + "completions/mean_length": 471.71429443359375, + "completions/min_length": 322.0, + "entropy/max": 0.75390625, + "entropy/mean": 0.390625, + "entropy/min": 0.279296875, + "epoch": 0.199, + "grad_norm": 1.1737449138228981, + "kl": 0.291015625, + "learning_rate": 1.825471896962774e-06, + "loss": 0.0029436303302645683, + "memory(GiB)": 146.12, + "reward": 1.9432400465011597, + "reward_std": 0.023910468444228172, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5495333075523376, + "rewards/EvidenceHallucination/std": 0.30792468786239624, + "rewards/Evidence_Num_Record/mean": 4.547619342803955, + "rewards/Evidence_Num_Record/std": 1.06387197971344, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8333333730697632, + "rewards/VideoAccuracy/std": 0.37719547748565674, + "step": 199, + "train_speed(iter/s)": 0.018846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 575.0, + "completions/mean_length": 398.6190490722656, + "completions/min_length": 303.0, + "entropy/max": 0.67578125, + "entropy/mean": 0.37109375, + "entropy/min": 0.2041015625, + "epoch": 0.2, + "grad_norm": 1.2972484508452815, + "kl": 0.306640625, + "learning_rate": 1.8236765814298327e-06, + "loss": 0.00309559958986938, + "memory(GiB)": 146.12, + "reward": 1.3546700477600098, + "reward_std": 0.40779995918273926, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24729494750499725, + "rewards/EvidenceHallucination/std": 0.3658376932144165, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.7213357090950012, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.30521106719970703, + "rewards/VideoAccuracy/std": 0.4353443384170532, + "step": 200, + "train_speed(iter/s)": 0.018843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1003.0, + "completions/mean_length": 479.4761962890625, + "completions/min_length": 324.0, + "entropy/max": 0.65625, + "entropy/mean": 0.349609375, + "entropy/min": 0.095703125, + "epoch": 0.201, + "grad_norm": 1.1284051039205776, + "kl": 0.20703125, + "learning_rate": 1.821872971482861e-06, + "loss": 0.002093291375786066, + "memory(GiB)": 146.12, + "reward": 1.8066730499267578, + "reward_std": 0.13066262006759644, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3956207036972046, + "rewards/EvidenceHallucination/std": 0.37852877378463745, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.6559829115867615, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.5323108434677124, + "rewards/VideoAccuracy/std": 0.35995954275131226, + "step": 201, + "train_speed(iter/s)": 0.018768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 819.0, + "completions/mean_length": 474.6190490722656, + "completions/min_length": 293.0, + "entropy/max": 1.28125, + "entropy/mean": 0.51953125, + "entropy/min": 0.30859375, + "epoch": 0.202, + "grad_norm": 1.3102689029487984, + "kl": 0.294921875, + "learning_rate": 1.8200610852841911e-06, + "loss": 0.0029544036369770765, + "memory(GiB)": 146.12, + "reward": 1.6257050037384033, + "reward_std": 0.10190405696630478, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.390429824590683, + "rewards/EvidenceHallucination/std": 0.378105491399765, + "rewards/Evidence_Num_Record/mean": 4.833333492279053, + "rewards/Evidence_Num_Record/std": 1.5759884119033813, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5476190447807312, + "rewards/VideoAccuracy/std": 0.5037605166435242, + "step": 202, + "train_speed(iter/s)": 0.018772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 509.0, + "completions/mean_length": 394.952392578125, + "completions/min_length": 252.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.4140625, + "entropy/min": 0.306640625, + "epoch": 0.203, + "grad_norm": 1.410439577448358, + "kl": 0.2890625, + "learning_rate": 1.8182409410794966e-06, + "loss": 0.0028976770117878914, + "memory(GiB)": 146.12, + "reward": 1.54341459274292, + "reward_std": 0.264932245016098, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31481143832206726, + "rewards/EvidenceHallucination/std": 0.36222589015960693, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.5942034721374512, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4804523289203644, + "rewards/VideoAccuracy/std": 0.4593258798122406, + "step": 203, + "train_speed(iter/s)": 0.018778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 446.66668701171875, + "completions/min_length": 242.0, + "entropy/max": 1.125, + "entropy/mean": 0.39453125, + "entropy/min": 0.049560546875, + "epoch": 0.204, + "grad_norm": 1.333677043718328, + "kl": 0.259765625, + "learning_rate": 1.8164125571976096e-06, + "loss": 0.002771096769720316, + "memory(GiB)": 146.12, + "reward": 1.766880989074707, + "reward_std": 0.2664657533168793, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3919958472251892, + "rewards/EvidenceHallucination/std": 0.3605327010154724, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.8039536476135254, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.5714285969734192, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.5861007571220398, + "rewards/VideoAccuracy/std": 0.47626814246177673, + "step": 204, + "train_speed(iter/s)": 0.018727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 784.0, + "completions/mean_length": 503.5238037109375, + "completions/min_length": 311.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.39453125, + "entropy/min": 0.11669921875, + "epoch": 0.205, + "grad_norm": 1.2386194810672908, + "kl": 0.22265625, + "learning_rate": 1.8145759520503357e-06, + "loss": 0.0022671599872410297, + "memory(GiB)": 146.12, + "reward": 2.2045955657958984, + "reward_std": 0.07308042049407959, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6259084343910217, + "rewards/EvidenceHallucination/std": 0.24700582027435303, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.0581248998641968, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 1.0175089836120605, + "rewards/VideoAccuracy/std": 0.12256230413913727, + "step": 205, + "train_speed(iter/s)": 0.01874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 582.0, + "completions/mean_length": 379.952392578125, + "completions/min_length": 211.0, + "entropy/max": 0.8984375, + "entropy/mean": 0.388671875, + "entropy/min": 0.24609375, + "epoch": 0.206, + "grad_norm": 1.027901011642713, + "kl": 0.2890625, + "learning_rate": 1.812731144132268e-06, + "loss": 0.0029317340813577175, + "memory(GiB)": 146.12, + "reward": 1.563454031944275, + "reward_std": 0.01673251762986183, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3172701895236969, + "rewards/EvidenceHallucination/std": 0.3400120735168457, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 1.000870704650879, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5, + "rewards/VideoAccuracy/std": 0.5060608386993408, + "step": 206, + "train_speed(iter/s)": 0.018738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 503.0, + "completions/mean_length": 398.16668701171875, + "completions/min_length": 237.0, + "entropy/max": 0.5625, + "entropy/mean": 0.41796875, + "entropy/min": 0.22265625, + "epoch": 0.207, + "grad_norm": 1.4431055373612067, + "kl": 0.291015625, + "learning_rate": 1.8108781520206018e-06, + "loss": 0.0029396088793873787, + "memory(GiB)": 146.12, + "reward": 1.3641424179077148, + "reward_std": 0.3134014308452606, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18790483474731445, + "rewards/EvidenceHallucination/std": 0.3463141620159149, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.8502919673919678, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.46790117025375366, + "rewards/VideoAccuracy/mean": 0.2646566331386566, + "rewards/VideoAccuracy/std": 0.4310843050479889, + "step": 207, + "train_speed(iter/s)": 0.018751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 783.0, + "completions/mean_length": 452.6190490722656, + "completions/min_length": 304.0, + "entropy/max": 0.84375, + "entropy/mean": 0.345703125, + "entropy/min": 0.1494140625, + "epoch": 0.208, + "grad_norm": 0.9803073502002945, + "kl": 0.2060546875, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.0020949551835656166, + "memory(GiB)": 146.12, + "reward": 1.8597743511199951, + "reward_std": 0.1580304503440857, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.43828946352005005, + "rewards/EvidenceHallucination/std": 0.35627278685569763, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.7593780159950256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.6435449719429016, + "rewards/VideoAccuracy/std": 0.5075361132621765, + "step": 208, + "train_speed(iter/s)": 0.018752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 615.0, + "completions/mean_length": 419.3333435058594, + "completions/min_length": 227.0, + "entropy/max": 0.953125, + "entropy/mean": 0.447265625, + "entropy/min": 0.28125, + "epoch": 0.209, + "grad_norm": 0.8107368307761447, + "kl": 0.291015625, + "learning_rate": 1.8071476899371413e-06, + "loss": 0.0029117565136402845, + "memory(GiB)": 146.12, + "reward": 1.3520643711090088, + "reward_std": 0.07225346565246582, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21270343661308289, + "rewards/EvidenceHallucination/std": 0.3311234712600708, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9997096061706543, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 209, + "train_speed(iter/s)": 0.018737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 506.0, + "completions/mean_length": 389.952392578125, + "completions/min_length": 297.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.42578125, + "entropy/min": 0.26171875, + "epoch": 0.21, + "grad_norm": 1.3087588124307798, + "kl": 0.296875, + "learning_rate": 1.8052702575310586e-06, + "loss": 0.002977391704916954, + "memory(GiB)": 146.12, + "reward": 1.514713168144226, + "reward_std": 0.31183916330337524, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.34595897793769836, + "rewards/EvidenceHallucination/std": 0.39453673362731934, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.6270147562026978, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4455213248729706, + "rewards/VideoAccuracy/std": 0.4912121891975403, + "step": 210, + "train_speed(iter/s)": 0.018747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 945.0, + "completions/mean_length": 507.5952453613281, + "completions/min_length": 297.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.361328125, + "entropy/min": 0.1650390625, + "epoch": 0.211, + "grad_norm": 1.2637694787988318, + "kl": 0.2041015625, + "learning_rate": 1.8033847160624225e-06, + "loss": 0.0020564354490488768, + "memory(GiB)": 146.12, + "reward": 2.2377514839172363, + "reward_std": 0.20628312230110168, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40893810987472534, + "rewards/EvidenceHallucination/std": 0.31318873167037964, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9559638500213623, + "rewards/VideoAccuracy/std": 0.4015641510486603, + "step": 211, + "train_speed(iter/s)": 0.018744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1242.0, + "completions/mean_length": 474.71429443359375, + "completions/min_length": 273.0, + "entropy/max": 1.171875, + "entropy/mean": 0.474609375, + "entropy/min": 0.1923828125, + "epoch": 0.212, + "grad_norm": 0.9626542864618389, + "kl": 0.28515625, + "learning_rate": 1.801491084518615e-06, + "loss": 0.0029152999632060528, + "memory(GiB)": 146.12, + "reward": 1.4065104722976685, + "reward_std": 0.07577715069055557, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24683839082717896, + "rewards/EvidenceHallucination/std": 0.34258249402046204, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 2.2638142108917236, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3571428656578064, + "rewards/VideoAccuracy/std": 0.48496562242507935, + "step": 212, + "train_speed(iter/s)": 0.018742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 723.0, + "completions/mean_length": 386.3333435058594, + "completions/min_length": 282.0, + "entropy/max": 0.8359375, + "entropy/mean": 0.41796875, + "entropy/min": 0.220703125, + "epoch": 0.213, + "grad_norm": 1.3631479721595794, + "kl": 0.3203125, + "learning_rate": 1.7995893819684848e-06, + "loss": 0.003235860262066126, + "memory(GiB)": 146.12, + "reward": 1.5252161026000977, + "reward_std": 0.17157170176506042, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3588860034942627, + "rewards/EvidenceHallucination/std": 0.4134328067302704, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.8968262076377869, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4534388482570648, + "rewards/VideoAccuracy/std": 0.4595082998275757, + "step": 213, + "train_speed(iter/s)": 0.018735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 705.0, + "completions/mean_length": 408.71429443359375, + "completions/min_length": 225.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.35546875, + "entropy/min": 0.11865234375, + "epoch": 0.214, + "grad_norm": 1.1554583596717818, + "kl": 0.28515625, + "learning_rate": 1.7976796275621553e-06, + "loss": 0.0030635735020041466, + "memory(GiB)": 146.12, + "reward": 1.7635283470153809, + "reward_std": 0.20307080447673798, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35455483198165894, + "rewards/EvidenceHallucination/std": 0.3721371293067932, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.7730206847190857, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.5688078999519348, + "rewards/VideoAccuracy/std": 0.5657344460487366, + "step": 214, + "train_speed(iter/s)": 0.018742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 931.0, + "completions/mean_length": 519.6428833007812, + "completions/min_length": 308.0, + "entropy/max": 0.92578125, + "entropy/mean": 0.41015625, + "entropy/min": 0.142578125, + "epoch": 0.215, + "grad_norm": 0.7731173289573026, + "kl": 0.2353515625, + "learning_rate": 1.795761840530832e-06, + "loss": 0.002380756661295891, + "memory(GiB)": 146.12, + "reward": 1.489245891571045, + "reward_std": 0.04518473148345947, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2751462161540985, + "rewards/EvidenceHallucination/std": 0.3985624313354492, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.1526870727539062, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.367549866437912, + "rewards/VideoAccuracy/std": 0.47184836864471436, + "step": 215, + "train_speed(iter/s)": 0.01874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 567.0, + "completions/mean_length": 389.0714416503906, + "completions/min_length": 298.0, + "entropy/max": 0.515625, + "entropy/mean": 0.421875, + "entropy/min": 0.265625, + "epoch": 0.216, + "grad_norm": 1.1110957828022496, + "kl": 0.330078125, + "learning_rate": 1.7938360401866094e-06, + "loss": 0.0033316153567284346, + "memory(GiB)": 146.12, + "reward": 1.5227714776992798, + "reward_std": 0.148939311504364, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35195261240005493, + "rewards/EvidenceHallucination/std": 0.3986785113811493, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.8594626188278198, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4523809552192688, + "rewards/VideoAccuracy/std": 0.5037605166435242, + "step": 216, + "train_speed(iter/s)": 0.018743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 670.0, + "completions/mean_length": 392.4761962890625, + "completions/min_length": 282.0, + "entropy/max": 0.82421875, + "entropy/mean": 0.4609375, + "entropy/min": 0.302734375, + "epoch": 0.217, + "grad_norm": 0.978112983100945, + "kl": 0.314453125, + "learning_rate": 1.791902245922275e-06, + "loss": 0.0031728388275951147, + "memory(GiB)": 146.12, + "reward": 1.2725377082824707, + "reward_std": 0.15308094024658203, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.12878981232643127, + "rewards/EvidenceHallucination/std": 0.2983783185482025, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.8570944666862488, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.46790117025375366, + "rewards/VideoAccuracy/mean": 0.18487496674060822, + "rewards/VideoAccuracy/std": 0.4087332785129547, + "step": 217, + "train_speed(iter/s)": 0.018757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 791.0, + "completions/mean_length": 538.7142944335938, + "completions/min_length": 359.0, + "entropy/max": 0.890625, + "entropy/mean": 0.376953125, + "entropy/min": 0.1630859375, + "epoch": 0.218, + "grad_norm": 1.1056095534057395, + "kl": 0.2060546875, + "learning_rate": 1.789960477211116e-06, + "loss": 0.0020709068048745394, + "memory(GiB)": 146.12, + "reward": 1.8567214012145996, + "reward_std": 0.18893079459667206, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31860214471817017, + "rewards/EvidenceHallucination/std": 0.3428250849246979, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 0.9856696724891663, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074110031128, + "rewards/VideoAccuracy/mean": 0.6691913604736328, + "rewards/VideoAccuracy/std": 0.4289722144603729, + "step": 218, + "train_speed(iter/s)": 0.018745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 617.0, + "completions/mean_length": 412.3809509277344, + "completions/min_length": 282.0, + "entropy/max": 1.2890625, + "entropy/mean": 0.51953125, + "entropy/min": 0.291015625, + "epoch": 0.219, + "grad_norm": 1.4397652540632981, + "kl": 0.330078125, + "learning_rate": 1.7880107536067217e-06, + "loss": 0.0033206443767994642, + "memory(GiB)": 146.12, + "reward": 1.8000737428665161, + "reward_std": 0.2137765884399414, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5479874014854431, + "rewards/EvidenceHallucination/std": 0.3887883722782135, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9997095465660095, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6904761791229248, + "rewards/VideoAccuracy/std": 0.4679011106491089, + "step": 219, + "train_speed(iter/s)": 0.018754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 439.0, + "completions/mean_length": 357.7857360839844, + "completions/min_length": 253.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.462890625, + "entropy/min": 0.333984375, + "epoch": 0.22, + "grad_norm": 0.9903506450991835, + "kl": 0.330078125, + "learning_rate": 1.7860530947427874e-06, + "loss": 0.0032826876267790794, + "memory(GiB)": 146.12, + "reward": 1.127616047859192, + "reward_std": 0.21693232655525208, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.07094395905733109, + "rewards/EvidenceHallucination/std": 0.22947509586811066, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.6502032279968262, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.11342722177505493, + "rewards/VideoAccuracy/std": 0.28198370337486267, + "step": 220, + "train_speed(iter/s)": 0.018789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 684.0, + "completions/mean_length": 436.4285888671875, + "completions/min_length": 331.0, + "entropy/max": 0.671875, + "entropy/mean": 0.3359375, + "entropy/min": 0.1513671875, + "epoch": 0.221, + "grad_norm": 1.197858218367104, + "kl": 0.2138671875, + "learning_rate": 1.7840875203329158e-06, + "loss": 0.002129746600985527, + "memory(GiB)": 146.12, + "reward": 2.172039747238159, + "reward_std": 0.12870250642299652, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4757545590400696, + "rewards/EvidenceHallucination/std": 0.35233354568481445, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.6921662092208862, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8816505670547485, + "rewards/VideoAccuracy/std": 0.38467472791671753, + "step": 221, + "train_speed(iter/s)": 0.018781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 697.0, + "completions/mean_length": 442.66668701171875, + "completions/min_length": 281.0, + "entropy/max": 1.2890625, + "entropy/mean": 0.490234375, + "entropy/min": 0.255859375, + "epoch": 0.222, + "grad_norm": 1.1591322108226676, + "kl": 0.3125, + "learning_rate": 1.7821140501704192e-06, + "loss": 0.0031443522311747074, + "memory(GiB)": 146.12, + "reward": 1.5666881799697876, + "reward_std": 0.1563645899295807, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33344078063964844, + "rewards/EvidenceHallucination/std": 0.3708530366420746, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.2230842113494873, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5, + "rewards/VideoAccuracy/std": 0.506060779094696, + "step": 222, + "train_speed(iter/s)": 0.018755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 628.0, + "completions/mean_length": 379.3809509277344, + "completions/min_length": 228.0, + "entropy/max": 1.0625, + "entropy/mean": 0.458984375, + "entropy/min": 0.26953125, + "epoch": 0.223, + "grad_norm": 1.1508011621624752, + "kl": 0.353515625, + "learning_rate": 1.7801327041281207e-06, + "loss": 0.003560734912753105, + "memory(GiB)": 146.12, + "reward": 1.4059683084487915, + "reward_std": 0.232594296336174, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2645784020423889, + "rewards/EvidenceHallucination/std": 0.3644518256187439, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.7265497446060181, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3530525863170624, + "rewards/VideoAccuracy/std": 0.46768462657928467, + "step": 223, + "train_speed(iter/s)": 0.018755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 634.0, + "completions/mean_length": 371.21429443359375, + "completions/min_length": 238.0, + "entropy/max": 0.84765625, + "entropy/mean": 0.416015625, + "entropy/min": 0.12353515625, + "epoch": 0.224, + "grad_norm": 1.2817648932774737, + "kl": 0.302734375, + "learning_rate": 1.7781435021581525e-06, + "loss": 0.003047728445380926, + "memory(GiB)": 146.12, + "reward": 1.99153470993042, + "reward_std": 0.16718348860740662, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5285584926605225, + "rewards/EvidenceHallucination/std": 0.41436928510665894, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.8281487822532654, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.7572515606880188, + "rewards/VideoAccuracy/std": 0.5330286622047424, + "step": 224, + "train_speed(iter/s)": 0.018762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 684.0, + "completions/mean_length": 475.952392578125, + "completions/min_length": 315.0, + "entropy/max": 0.6875, + "entropy/mean": 0.396484375, + "entropy/min": 0.12353515625, + "epoch": 0.225, + "grad_norm": 1.1873840762665278, + "kl": 0.2373046875, + "learning_rate": 1.7761464642917567e-06, + "loss": 0.0024001363199204206, + "memory(GiB)": 146.12, + "reward": 1.6954164505004883, + "reward_std": 0.313042014837265, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4449019730091095, + "rewards/EvidenceHallucination/std": 0.4287704825401306, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.2308934926986694, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5397694706916809, + "rewards/VideoAccuracy/std": 0.4395271837711334, + "step": 225, + "train_speed(iter/s)": 0.018754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 434.26190185546875, + "completions/min_length": 307.0, + "entropy/max": 2.375, + "entropy/mean": 0.58984375, + "entropy/min": 0.1044921875, + "epoch": 0.226, + "grad_norm": 1.373347458456585, + "kl": 0.333984375, + "learning_rate": 1.7741416106390826e-06, + "loss": 0.0035560843534767628, + "memory(GiB)": 146.12, + "reward": 1.257364273071289, + "reward_std": 0.19904689490795135, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20269151031970978, + "rewards/EvidenceHallucination/std": 0.36894580721855164, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.6115421056747437, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.240635484457016, + "rewards/VideoAccuracy/std": 0.4220869541168213, + "step": 226, + "train_speed(iter/s)": 0.018727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07142857142857142, + "completions/max_length": 2625.0, + "completions/mean_length": 534.2142944335938, + "completions/min_length": 309.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.376953125, + "entropy/min": 0.04052734375, + "epoch": 0.227, + "grad_norm": 1.2193637822754348, + "kl": 0.306640625, + "learning_rate": 1.7721289613889834e-06, + "loss": 0.003318554488942027, + "memory(GiB)": 146.12, + "reward": 1.5782241821289062, + "reward_std": 0.22096966207027435, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2946185767650604, + "rewards/EvidenceHallucination/std": 0.3401840925216675, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.6339229345321655, + "rewards/Format/mean": 0.9047619104385376, + "rewards/Format/std": 0.297101765871048, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5002529621124268, + "rewards/VideoAccuracy/std": 0.5846176743507385, + "step": 227, + "train_speed(iter/s)": 0.018701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 723.0, + "completions/mean_length": 497.9285888671875, + "completions/min_length": 328.0, + "entropy/max": 1.2265625, + "entropy/mean": 0.380859375, + "entropy/min": 0.140625, + "epoch": 0.228, + "grad_norm": 1.2673203843836767, + "kl": 0.2080078125, + "learning_rate": 1.7701085368088155e-06, + "loss": 0.002099959645420313, + "memory(GiB)": 146.12, + "reward": 1.8202496767044067, + "reward_std": 0.2765672206878662, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2655429244041443, + "rewards/EvidenceHallucination/std": 0.370466411113739, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 0.8035923838615417, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6338077187538147, + "rewards/VideoAccuracy/std": 0.40736258029937744, + "step": 228, + "train_speed(iter/s)": 0.018703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 627.0, + "completions/mean_length": 400.8571472167969, + "completions/min_length": 269.0, + "entropy/max": 0.75, + "entropy/mean": 0.443359375, + "entropy/min": 0.263671875, + "epoch": 0.229, + "grad_norm": 1.2111830048331047, + "kl": 0.345703125, + "learning_rate": 1.7680803572442319e-06, + "loss": 0.003480810672044754, + "memory(GiB)": 146.12, + "reward": 1.5204336643218994, + "reward_std": 0.1846160888671875, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3402631878852844, + "rewards/EvidenceHallucination/std": 0.3967270851135254, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.8781777620315552, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4523809552192688, + "rewards/VideoAccuracy/std": 0.5037605166435242, + "step": 229, + "train_speed(iter/s)": 0.018701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 512.0, + "completions/mean_length": 372.76190185546875, + "completions/min_length": 284.0, + "entropy/max": 0.62109375, + "entropy/mean": 0.4140625, + "entropy/min": 0.2578125, + "epoch": 0.23, + "grad_norm": 1.0512029932846734, + "kl": 0.337890625, + "learning_rate": 1.766044443118978e-06, + "loss": 0.0034035081043839455, + "memory(GiB)": 146.12, + "reward": 1.1633344888687134, + "reward_std": 0.17522874474525452, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.09813331812620163, + "rewards/EvidenceHallucination/std": 0.25174546241760254, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.532345175743103, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.1437077820301056, + "rewards/VideoAccuracy/std": 0.34388238191604614, + "step": 230, + "train_speed(iter/s)": 0.018737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 691.0, + "completions/mean_length": 464.1190490722656, + "completions/min_length": 252.0, + "entropy/max": 0.439453125, + "entropy/mean": 0.30078125, + "entropy/min": 0.15234375, + "epoch": 0.231, + "grad_norm": 1.1926249390501715, + "kl": 0.2158203125, + "learning_rate": 1.7640008149346866e-06, + "loss": 0.0021739723160862923, + "memory(GiB)": 146.12, + "reward": 2.4249696731567383, + "reward_std": 0.18014255166053772, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5357918739318848, + "rewards/EvidenceHallucination/std": 0.38135936856269836, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.7593780755996704, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 1.1273351907730103, + "rewards/VideoAccuracy/std": 0.4719649851322174, + "step": 231, + "train_speed(iter/s)": 0.018719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 924.0, + "completions/mean_length": 440.5, + "completions/min_length": 314.0, + "entropy/max": 1.609375, + "entropy/mean": 0.52734375, + "entropy/min": 0.2734375, + "epoch": 0.232, + "grad_norm": 1.046542137226126, + "kl": 0.314453125, + "learning_rate": 1.761949493270671e-06, + "loss": 0.0032124067656695843, + "memory(GiB)": 146.12, + "reward": 1.3601796627044678, + "reward_std": 0.2623414993286133, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2532789707183838, + "rewards/EvidenceHallucination/std": 0.392877995967865, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 0.8430904150009155, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011106491089, + "step": 232, + "train_speed(iter/s)": 0.018726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 420.0, + "completions/min_length": 208.0, + "entropy/max": 0.796875, + "entropy/mean": 0.458984375, + "entropy/min": 0.05419921875, + "epoch": 0.233, + "grad_norm": 1.121574397959357, + "kl": 0.314453125, + "learning_rate": 1.759890498783717e-06, + "loss": 0.003404829418286681, + "memory(GiB)": 146.12, + "reward": 1.3430793285369873, + "reward_std": 0.20950570702552795, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22730125486850739, + "rewards/EvidenceHallucination/std": 0.37339580059051514, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.7948732376098633, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 233, + "train_speed(iter/s)": 0.018683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 535.0, + "completions/mean_length": 393.23809814453125, + "completions/min_length": 316.0, + "entropy/max": 0.50390625, + "entropy/mean": 0.38671875, + "entropy/min": 0.255859375, + "epoch": 0.234, + "grad_norm": 1.4538794911698656, + "kl": 0.322265625, + "learning_rate": 1.7578238522078768e-06, + "loss": 0.0032620998099446297, + "memory(GiB)": 146.12, + "reward": 1.8541812896728516, + "reward_std": 0.2292259782552719, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3325042128562927, + "rewards/EvidenceHallucination/std": 0.40219464898109436, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.5902813673019409, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6876804232597351, + "rewards/VideoAccuracy/std": 0.4814368188381195, + "step": 234, + "train_speed(iter/s)": 0.01869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 611.0, + "completions/mean_length": 427.19049072265625, + "completions/min_length": 339.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.44921875, + "entropy/min": 0.1865234375, + "epoch": 0.235, + "grad_norm": 1.1149482972182843, + "kl": 0.255859375, + "learning_rate": 1.7557495743542582e-06, + "loss": 0.0025807444471865892, + "memory(GiB)": 146.12, + "reward": 1.8418928384780884, + "reward_std": 0.12823235988616943, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5464540123939514, + "rewards/EvidenceHallucination/std": 0.4399447441101074, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.9323829412460327, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6659352779388428, + "rewards/VideoAccuracy/std": 0.5348252654075623, + "step": 235, + "train_speed(iter/s)": 0.018687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 515.0, + "completions/mean_length": 341.6190490722656, + "completions/min_length": 239.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.392578125, + "entropy/min": 0.27734375, + "epoch": 0.236, + "grad_norm": 1.5263174226155418, + "kl": 0.34375, + "learning_rate": 1.7536676861108164e-06, + "loss": 0.0034528947435319424, + "memory(GiB)": 146.12, + "reward": 1.6555285453796387, + "reward_std": 0.21378569304943085, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.435823529958725, + "rewards/EvidenceHallucination/std": 0.41382157802581787, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.607731819152832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5683638453483582, + "rewards/VideoAccuracy/std": 0.4869895875453949, + "step": 236, + "train_speed(iter/s)": 0.018676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 517.0, + "completions/mean_length": 373.9285888671875, + "completions/min_length": 263.0, + "entropy/max": 0.734375, + "entropy/mean": 0.40234375, + "entropy/min": 0.2265625, + "epoch": 0.237, + "grad_norm": 1.2119485357062756, + "kl": 0.33203125, + "learning_rate": 1.7515782084421423e-06, + "loss": 0.0033400084357708693, + "memory(GiB)": 146.12, + "reward": 1.6784098148345947, + "reward_std": 0.1474025994539261, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31832629442214966, + "rewards/EvidenceHallucination/std": 0.3810774087905884, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.4371005594730377, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.5671253204345703, + "rewards/VideoAccuracy/std": 0.5303839445114136, + "step": 237, + "train_speed(iter/s)": 0.018691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1054.0, + "completions/mean_length": 481.7857360839844, + "completions/min_length": 320.0, + "entropy/max": 1.2890625, + "entropy/mean": 0.40234375, + "entropy/min": 0.1484375, + "epoch": 0.238, + "grad_norm": 1.307395098001326, + "kl": 0.23828125, + "learning_rate": 1.749481162389254e-06, + "loss": 0.0023901357781141996, + "memory(GiB)": 146.12, + "reward": 2.0823862552642822, + "reward_std": 0.25942322611808777, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3701555132865906, + "rewards/EvidenceHallucination/std": 0.4010622501373291, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.916046142578125, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8750216364860535, + "rewards/VideoAccuracy/std": 0.4637349247932434, + "step": 238, + "train_speed(iter/s)": 0.018631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 596.0, + "completions/mean_length": 398.21429443359375, + "completions/min_length": 287.0, + "entropy/max": 1.046875, + "entropy/mean": 0.4609375, + "entropy/min": 0.2373046875, + "epoch": 0.239, + "grad_norm": 1.3285319130495021, + "kl": 0.326171875, + "learning_rate": 1.747376569069381e-06, + "loss": 0.0033058568369597197, + "memory(GiB)": 146.12, + "reward": 1.411938190460205, + "reward_std": 0.4043456017971039, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27397623658180237, + "rewards/EvidenceHallucination/std": 0.37728753685951233, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.6717287302017212, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3571428656578064, + "rewards/VideoAccuracy/std": 0.48496562242507935, + "step": 239, + "train_speed(iter/s)": 0.018651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 463.0, + "completions/mean_length": 364.1428527832031, + "completions/min_length": 291.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.37890625, + "entropy/min": 0.26953125, + "epoch": 0.24, + "grad_norm": 1.1106519834910449, + "kl": 0.318359375, + "learning_rate": 1.7452644496757548e-06, + "loss": 0.00319494167342782, + "memory(GiB)": 146.12, + "reward": 1.3215675354003906, + "reward_std": 0.18073152005672455, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23566341400146484, + "rewards/EvidenceHallucination/std": 0.37131911516189575, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.4843665361404419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2744348645210266, + "rewards/VideoAccuracy/std": 0.408074289560318, + "step": 240, + "train_speed(iter/s)": 0.018586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 825.0, + "completions/mean_length": 446.66668701171875, + "completions/min_length": 295.0, + "entropy/max": 0.46875, + "entropy/mean": 0.3046875, + "entropy/min": 0.126953125, + "epoch": 0.241, + "grad_norm": 1.1616853808362892, + "kl": 0.2236328125, + "learning_rate": 1.743144825477394e-06, + "loss": 0.002262769965454936, + "memory(GiB)": 146.12, + "reward": 2.2303261756896973, + "reward_std": 0.12588879466056824, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.51683509349823, + "rewards/EvidenceHallucination/std": 0.3724346160888672, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.5768471360206604, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9269590973854065, + "rewards/VideoAccuracy/std": 0.41551220417022705, + "step": 241, + "train_speed(iter/s)": 0.018587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 652.0, + "completions/mean_length": 447.452392578125, + "completions/min_length": 297.0, + "entropy/max": 1.734375, + "entropy/mean": 0.5546875, + "entropy/min": 0.271484375, + "epoch": 0.242, + "grad_norm": 1.2606325767889839, + "kl": 0.314453125, + "learning_rate": 1.7410177178188917e-06, + "loss": 0.00319875031709671, + "memory(GiB)": 146.12, + "reward": 1.6765947341918945, + "reward_std": 0.11555620282888412, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40678247809410095, + "rewards/EvidenceHallucination/std": 0.35523679852485657, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 1.2653241157531738, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679574370384216, + "step": 242, + "train_speed(iter/s)": 0.018588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 555.0, + "completions/mean_length": 387.4761962890625, + "completions/min_length": 297.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.427734375, + "entropy/min": 0.30859375, + "epoch": 0.243, + "grad_norm": 1.146758509584447, + "kl": 0.326171875, + "learning_rate": 1.7388831481201976e-06, + "loss": 0.0032735601998865604, + "memory(GiB)": 146.12, + "reward": 1.2293654680252075, + "reward_std": 0.21915750205516815, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.1501440852880478, + "rewards/EvidenceHallucination/std": 0.3173399567604065, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.8499504923820496, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2112414836883545, + "rewards/VideoAccuracy/std": 0.3915502429008484, + "step": 243, + "train_speed(iter/s)": 0.018593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 538.0, + "completions/mean_length": 389.6190490722656, + "completions/min_length": 266.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.369140625, + "entropy/min": 0.1630859375, + "epoch": 0.244, + "grad_norm": 1.500520513978552, + "kl": 0.298828125, + "learning_rate": 1.7367411378764047e-06, + "loss": 0.003024215577170253, + "memory(GiB)": 146.12, + "reward": 1.8303215503692627, + "reward_std": 0.09470437467098236, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44220417737960815, + "rewards/EvidenceHallucination/std": 0.4173884987831116, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.6172134280204773, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.6133091449737549, + "rewards/VideoAccuracy/std": 0.4946582615375519, + "step": 244, + "train_speed(iter/s)": 0.018619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 812.0, + "completions/mean_length": 507.0714416503906, + "completions/min_length": 241.0, + "entropy/max": 1.640625, + "entropy/mean": 0.51953125, + "entropy/min": 0.1669921875, + "epoch": 0.245, + "grad_norm": 0.9801385937878792, + "kl": 0.2333984375, + "learning_rate": 1.7345917086575331e-06, + "loss": 0.0023636885453015566, + "memory(GiB)": 146.12, + "reward": 1.4090077877044678, + "reward_std": 0.22807496786117554, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.17244532704353333, + "rewards/EvidenceHallucination/std": 0.3078490197658539, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.8323455452919006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.31261393427848816, + "rewards/VideoAccuracy/std": 0.3989967703819275, + "step": 245, + "train_speed(iter/s)": 0.018623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 540.0, + "completions/mean_length": 390.4761962890625, + "completions/min_length": 277.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.4453125, + "entropy/min": 0.291015625, + "epoch": 0.246, + "grad_norm": 1.1444176586472752, + "kl": 0.341796875, + "learning_rate": 1.7324348821083108e-06, + "loss": 0.0034365360625088215, + "memory(GiB)": 146.12, + "reward": 1.5113457441329956, + "reward_std": 0.17555874586105347, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33632904291152954, + "rewards/EvidenceHallucination/std": 0.41669946908950806, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.5823577642440796, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4440799057483673, + "rewards/VideoAccuracy/std": 0.4973483979701996, + "step": 246, + "train_speed(iter/s)": 0.018639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 527.0, + "completions/mean_length": 400.4761962890625, + "completions/min_length": 324.0, + "entropy/max": 0.609375, + "entropy/mean": 0.40234375, + "entropy/min": 0.267578125, + "epoch": 0.247, + "grad_norm": 1.3956294467056896, + "kl": 0.326171875, + "learning_rate": 1.7302706799479574e-06, + "loss": 0.0032703722827136517, + "memory(GiB)": 146.12, + "reward": 1.3936400413513184, + "reward_std": 0.27235424518585205, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21763722598552704, + "rewards/EvidenceHallucination/std": 0.3722839951515198, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.7034013867378235, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.28820788860321045, + "rewards/VideoAccuracy/std": 0.41962555050849915, + "step": 247, + "train_speed(iter/s)": 0.018645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 986.0, + "completions/mean_length": 482.90478515625, + "completions/min_length": 321.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.3984375, + "entropy/min": 0.166015625, + "epoch": 0.248, + "grad_norm": 1.2035411174051378, + "kl": 0.2216796875, + "learning_rate": 1.728099123969964e-06, + "loss": 0.002241044072434306, + "memory(GiB)": 146.12, + "reward": 1.8912005424499512, + "reward_std": 0.19596272706985474, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3262063264846802, + "rewards/EvidenceHallucination/std": 0.38895130157470703, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.7860573530197144, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6973879337310791, + "rewards/VideoAccuracy/std": 0.445387601852417, + "step": 248, + "train_speed(iter/s)": 0.018639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 503.0, + "completions/mean_length": 404.5, + "completions/min_length": 288.0, + "entropy/max": 0.62109375, + "entropy/mean": 0.48828125, + "entropy/min": 0.3359375, + "epoch": 0.249, + "grad_norm": 1.1842889183539216, + "kl": 0.322265625, + "learning_rate": 1.725920236041876e-06, + "loss": 0.003257274627685547, + "memory(GiB)": 146.12, + "reward": 1.3008848428726196, + "reward_std": 0.2445618361234665, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1949000358581543, + "rewards/EvidenceHallucination/std": 0.3388776481151581, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.6325473189353943, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.261904776096344, + "rewards/VideoAccuracy/std": 0.44500064849853516, + "step": 249, + "train_speed(iter/s)": 0.018639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 537.0, + "completions/mean_length": 397.5952453613281, + "completions/min_length": 316.0, + "entropy/max": 0.828125, + "entropy/mean": 0.42578125, + "entropy/min": 0.3125, + "epoch": 0.25, + "grad_norm": 1.3547388774578903, + "kl": 0.322265625, + "learning_rate": 1.72373403810507e-06, + "loss": 0.0032412242144346237, + "memory(GiB)": 146.12, + "reward": 1.5807161331176758, + "reward_std": 0.27251145243644714, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44168218970298767, + "rewards/EvidenceHallucination/std": 0.4541189670562744, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.5436787009239197, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4923795461654663, + "rewards/VideoAccuracy/std": 0.4577435851097107, + "step": 250, + "train_speed(iter/s)": 0.018646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 734.0, + "completions/mean_length": 438.40478515625, + "completions/min_length": 293.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.35546875, + "entropy/min": 0.166015625, + "epoch": 0.251, + "grad_norm": 1.307954720419532, + "kl": 0.2412109375, + "learning_rate": 1.7215405521745355e-06, + "loss": 0.002425679238513112, + "memory(GiB)": 146.12, + "reward": 2.0063116550445557, + "reward_std": 0.1874866485595703, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2503293454647064, + "rewards/EvidenceHallucination/std": 0.34678173065185547, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.5702659487724304, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7562455534934998, + "rewards/VideoAccuracy/std": 0.4492575228214264, + "step": 251, + "train_speed(iter/s)": 0.018647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 532.0, + "completions/mean_length": 421.452392578125, + "completions/min_length": 291.0, + "entropy/max": 1.421875, + "entropy/mean": 0.578125, + "entropy/min": 0.341796875, + "epoch": 0.252, + "grad_norm": 1.4208045132226126, + "kl": 0.322265625, + "learning_rate": 1.719339800338651e-06, + "loss": 0.0032400963827967644, + "memory(GiB)": 146.12, + "reward": 1.7803417444229126, + "reward_std": 0.2757423222064972, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4493274688720703, + "rewards/EvidenceHallucination/std": 0.407497763633728, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.5625766515731812, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6904761791229248, + "rewards/VideoAccuracy/std": 0.4679011404514313, + "step": 252, + "train_speed(iter/s)": 0.018641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 517.0, + "completions/mean_length": 394.16668701171875, + "completions/min_length": 273.0, + "entropy/max": 0.640625, + "entropy/mean": 0.451171875, + "entropy/min": 0.326171875, + "epoch": 0.253, + "grad_norm": 1.2929304180915258, + "kl": 0.333984375, + "learning_rate": 1.7171318047589635e-06, + "loss": 0.0033461390994489193, + "memory(GiB)": 146.12, + "reward": 1.3023897409439087, + "reward_std": 0.1982208788394928, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1807071566581726, + "rewards/EvidenceHallucination/std": 0.3331444561481476, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.6172134280204773, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2662482261657715, + "rewards/VideoAccuracy/std": 0.4432688355445862, + "step": 253, + "train_speed(iter/s)": 0.018639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 502.0, + "completions/mean_length": 377.6428527832031, + "completions/min_length": 266.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.37109375, + "entropy/min": 0.1640625, + "epoch": 0.254, + "grad_norm": 1.3430050273404348, + "kl": 0.322265625, + "learning_rate": 1.7149165876699635e-06, + "loss": 0.00322998920455575, + "memory(GiB)": 146.12, + "reward": 1.9686601161956787, + "reward_std": 0.15321964025497437, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3936600387096405, + "rewards/EvidenceHallucination/std": 0.41254332661628723, + "rewards/Evidence_Num_Record/mean": 3.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.4527628421783447, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.756594717502594, + "rewards/VideoAccuracy/std": 0.4783211350440979, + "step": 254, + "train_speed(iter/s)": 0.018631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 841.0, + "completions/mean_length": 483.0476379394531, + "completions/min_length": 340.0, + "entropy/max": 1.984375, + "entropy/mean": 0.546875, + "entropy/min": 0.1767578125, + "epoch": 0.255, + "grad_norm": 1.203906055474538, + "kl": 0.2314453125, + "learning_rate": 1.7126941713788629e-06, + "loss": 0.00235724076628685, + "memory(GiB)": 146.12, + "reward": 1.7237396240234375, + "reward_std": 0.3287803530693054, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2043936848640442, + "rewards/EvidenceHallucination/std": 0.35046061873435974, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.5174088478088379, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6161943078041077, + "rewards/VideoAccuracy/std": 0.4976581335067749, + "step": 255, + "train_speed(iter/s)": 0.018634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 822.0, + "completions/mean_length": 393.9761962890625, + "completions/min_length": 288.0, + "entropy/max": 0.6953125, + "entropy/mean": 0.44140625, + "entropy/min": 0.201171875, + "epoch": 0.256, + "grad_norm": 1.6810269204709323, + "kl": 0.302734375, + "learning_rate": 1.7104645782653689e-06, + "loss": 0.00305356178432703, + "memory(GiB)": 146.12, + "reward": 1.8969032764434814, + "reward_std": 0.14163285493850708, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5417539477348328, + "rewards/EvidenceHallucination/std": 0.3971792161464691, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.6357524394989014, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.788552463054657, + "rewards/VideoAccuracy/std": 0.4101748764514923, + "step": 256, + "train_speed(iter/s)": 0.018631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/mean_length": 386.69049072265625, + "completions/min_length": 295.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.4375, + "entropy/min": 0.224609375, + "epoch": 0.257, + "grad_norm": 1.5110203668059161, + "kl": 0.322265625, + "learning_rate": 1.708227830781459e-06, + "loss": 0.0032882890664041042, + "memory(GiB)": 146.12, + "reward": 1.7894835472106934, + "reward_std": 0.3021777272224426, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3878067135810852, + "rewards/EvidenceHallucination/std": 0.3837956190109253, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.41739192605018616, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6452555656433105, + "rewards/VideoAccuracy/std": 0.49124613404273987, + "step": 257, + "train_speed(iter/s)": 0.01864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 570.0, + "completions/mean_length": 432.3095397949219, + "completions/min_length": 313.0, + "entropy/max": 1.3046875, + "entropy/mean": 0.4453125, + "entropy/min": 0.19140625, + "epoch": 0.258, + "grad_norm": 1.2616484212255992, + "kl": 0.2236328125, + "learning_rate": 1.7059839514511562e-06, + "loss": 0.0022375385742634535, + "memory(GiB)": 146.12, + "reward": 1.8460009098052979, + "reward_std": 0.15329289436340332, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2919936180114746, + "rewards/EvidenceHallucination/std": 0.37274080514907837, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.6325473189353943, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6542688012123108, + "rewards/VideoAccuracy/std": 0.48837876319885254, + "step": 258, + "train_speed(iter/s)": 0.018641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 535.0, + "completions/mean_length": 377.1428527832031, + "completions/min_length": 295.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.447265625, + "entropy/min": 0.357421875, + "epoch": 0.259, + "grad_norm": 1.4788345343195048, + "kl": 0.326171875, + "learning_rate": 1.7037329628703003e-06, + "loss": 0.0032539048697799444, + "memory(GiB)": 146.12, + "reward": 1.6552636623382568, + "reward_std": 0.24730446934700012, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41917556524276733, + "rewards/EvidenceHallucination/std": 0.41460180282592773, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.538850724697113, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5714285969734192, + "rewards/VideoAccuracy/std": 0.5008702874183655, + "step": 259, + "train_speed(iter/s)": 0.018645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 506.0, + "completions/mean_length": 352.5, + "completions/min_length": 250.0, + "entropy/max": 0.84765625, + "entropy/mean": 0.482421875, + "entropy/min": 0.3359375, + "epoch": 0.26, + "grad_norm": 1.5800491541258506, + "kl": 0.3203125, + "learning_rate": 1.7014748877063213e-06, + "loss": 0.003226345870643854, + "memory(GiB)": 146.12, + "reward": 1.6665571928024292, + "reward_std": 0.24422809481620789, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42166832089424133, + "rewards/EvidenceHallucination/std": 0.4300963580608368, + "rewards/Evidence_Num_Record/mean": 2.952380895614624, + "rewards/Evidence_Num_Record/std": 0.6228330731391907, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5822235941886902, + "rewards/VideoAccuracy/std": 0.46341386437416077, + "step": 260, + "train_speed(iter/s)": 0.01865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 680.0, + "completions/mean_length": 440.23809814453125, + "completions/min_length": 329.0, + "entropy/max": 0.482421875, + "entropy/mean": 0.35546875, + "entropy/min": 0.1806640625, + "epoch": 0.261, + "grad_norm": 1.286282864683331, + "kl": 0.234375, + "learning_rate": 1.6992097486980106e-06, + "loss": 0.0023602007422596216, + "memory(GiB)": 146.12, + "reward": 2.2474725246429443, + "reward_std": 0.10352709889411926, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6212478280067444, + "rewards/EvidenceHallucination/std": 0.37116003036499023, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.6228330731391907, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9047619104385376, + "rewards/HonestTime/std": 0.297101765871048, + "rewards/VideoAccuracy/mean": 0.9422704577445984, + "rewards/VideoAccuracy/std": 0.35407426953315735, + "step": 261, + "train_speed(iter/s)": 0.018637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 806.0, + "completions/mean_length": 386.452392578125, + "completions/min_length": 230.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.5078125, + "entropy/min": 0.259765625, + "epoch": 0.262, + "grad_norm": 1.3120554102638122, + "kl": 0.330078125, + "learning_rate": 1.6969375686552936e-06, + "loss": 0.003335272893309593, + "memory(GiB)": 146.12, + "reward": 1.4105677604675293, + "reward_std": 0.2667071521282196, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.26712438464164734, + "rewards/EvidenceHallucination/std": 0.37364739179611206, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.7696326971054077, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3571428656578064, + "rewards/VideoAccuracy/std": 0.48496562242507935, + "step": 262, + "train_speed(iter/s)": 0.018627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 746.0, + "completions/mean_length": 361.3333435058594, + "completions/min_length": 248.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.4296875, + "entropy/min": 0.2080078125, + "epoch": 0.263, + "grad_norm": 1.3998149149303465, + "kl": 0.333984375, + "learning_rate": 1.6946583704589972e-06, + "loss": 0.003358669113367796, + "memory(GiB)": 146.12, + "reward": 1.6507415771484375, + "reward_std": 0.16618609428405762, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4359738528728485, + "rewards/EvidenceHallucination/std": 0.40811988711357117, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.41739192605018616, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5635467767715454, + "rewards/VideoAccuracy/std": 0.4699776768684387, + "step": 263, + "train_speed(iter/s)": 0.01863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 479.5, + "completions/min_length": 310.0, + "entropy/max": 1.1015625, + "entropy/mean": 0.3828125, + "entropy/min": 0.05859375, + "epoch": 0.264, + "grad_norm": 1.1863465610849044, + "kl": 0.275390625, + "learning_rate": 1.6923721770606226e-06, + "loss": 0.002848361385986209, + "memory(GiB)": 146.12, + "reward": 2.009645462036133, + "reward_std": 0.15725143253803253, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5580154061317444, + "rewards/EvidenceHallucination/std": 0.388560950756073, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.4915074408054352, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.776613712310791, + "rewards/VideoAccuracy/std": 0.5218595266342163, + "step": 264, + "train_speed(iter/s)": 0.01859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 704.0, + "completions/mean_length": 439.0714416503906, + "completions/min_length": 212.0, + "entropy/max": 1.171875, + "entropy/mean": 0.451171875, + "entropy/min": 0.14453125, + "epoch": 0.265, + "grad_norm": 0.7861337355114407, + "kl": 0.251953125, + "learning_rate": 1.690079011482112e-06, + "loss": 0.0025292334612458944, + "memory(GiB)": 146.12, + "reward": 1.294556736946106, + "reward_std": 0.17236953973770142, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.043062519282102585, + "rewards/EvidenceHallucination/std": 0.15979067981243134, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.8082759976387024, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.22403958439826965, + "rewards/VideoAccuracy/std": 0.37679558992385864, + "step": 265, + "train_speed(iter/s)": 0.018598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 462.0, + "completions/mean_length": 352.76190185546875, + "completions/min_length": 279.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.42578125, + "entropy/min": 0.2578125, + "epoch": 0.266, + "grad_norm": 1.2774521106470518, + "kl": 0.322265625, + "learning_rate": 1.687778896815617e-06, + "loss": 0.0032268771901726723, + "memory(GiB)": 146.12, + "reward": 1.636015772819519, + "reward_std": 0.07334038615226746, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41423699259757996, + "rewards/EvidenceHallucination/std": 0.4070185422897339, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.3541688024997711, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5531682372093201, + "rewards/VideoAccuracy/std": 0.48006635904312134, + "step": 266, + "train_speed(iter/s)": 0.018604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 570.0, + "completions/mean_length": 374.76190185546875, + "completions/min_length": 237.0, + "entropy/max": 0.96484375, + "entropy/mean": 0.435546875, + "entropy/min": 0.27734375, + "epoch": 0.267, + "grad_norm": 1.1790047383894677, + "kl": 0.330078125, + "learning_rate": 1.6854718562232666e-06, + "loss": 0.0033270521089434624, + "memory(GiB)": 146.12, + "reward": 1.3987579345703125, + "reward_std": 0.21581435203552246, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16781625151634216, + "rewards/EvidenceHallucination/std": 0.3076663911342621, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.48973196744918823, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.2985280156135559, + "rewards/VideoAccuracy/std": 0.49148377776145935, + "step": 267, + "train_speed(iter/s)": 0.018616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 572.0, + "completions/mean_length": 423.0476379394531, + "completions/min_length": 294.0, + "entropy/max": 0.88671875, + "entropy/mean": 0.3828125, + "entropy/min": 0.1328125, + "epoch": 0.268, + "grad_norm": 1.2475442952274358, + "kl": 0.2109375, + "learning_rate": 1.6831579129369345e-06, + "loss": 0.0021230760030448437, + "memory(GiB)": 146.12, + "reward": 1.9836636781692505, + "reward_std": 0.3484979569911957, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4690280258655548, + "rewards/EvidenceHallucination/std": 0.3501991033554077, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.525759220123291, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074110031128, + "rewards/VideoAccuracy/mean": 0.7660484313964844, + "rewards/VideoAccuracy/std": 0.45203521847724915, + "step": 268, + "train_speed(iter/s)": 0.018613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 571.0, + "completions/mean_length": 350.952392578125, + "completions/min_length": 217.0, + "entropy/max": 0.75, + "entropy/mean": 0.4453125, + "entropy/min": 0.2734375, + "epoch": 0.269, + "grad_norm": 1.6652848945868766, + "kl": 0.349609375, + "learning_rate": 1.6808370902580034e-06, + "loss": 0.0035270198713988066, + "memory(GiB)": 146.12, + "reward": 1.8315030336380005, + "reward_std": 0.20914201438426971, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5860862135887146, + "rewards/EvidenceHallucination/std": 0.4115218222141266, + "rewards/Evidence_Num_Record/mean": 3.0714285373687744, + "rewards/Evidence_Num_Record/std": 0.4629100263118744, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6809524297714233, + "rewards/VideoAccuracy/std": 0.46551209688186646, + "step": 269, + "train_speed(iter/s)": 0.018597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 786.0, + "completions/mean_length": 398.5714416503906, + "completions/min_length": 260.0, + "entropy/max": 0.7109375, + "entropy/mean": 0.431640625, + "entropy/min": 0.2216796875, + "epoch": 0.27, + "grad_norm": 1.407995494388005, + "kl": 0.283203125, + "learning_rate": 1.6785094115571322e-06, + "loss": 0.0028950113337486982, + "memory(GiB)": 146.12, + "reward": 1.444591999053955, + "reward_std": 0.22145162522792816, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3269577622413635, + "rewards/EvidenceHallucination/std": 0.42625492811203003, + "rewards/Evidence_Num_Record/mean": 3.047619104385376, + "rewards/Evidence_Num_Record/std": 0.5388506650924683, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.37920039892196655, + "rewards/VideoAccuracy/std": 0.42671406269073486, + "step": 270, + "train_speed(iter/s)": 0.018595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/mean_length": 418.73809814453125, + "completions/min_length": 291.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.345703125, + "entropy/min": 0.1669921875, + "epoch": 0.271, + "grad_norm": 1.2815953898124415, + "kl": 0.2236328125, + "learning_rate": 1.6761749002740193e-06, + "loss": 0.002252672566100955, + "memory(GiB)": 146.12, + "reward": 2.0878782272338867, + "reward_std": 0.1658097207546234, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4907301962375641, + "rewards/EvidenceHallucination/std": 0.38563522696495056, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.7054623961448669, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430335700511932, + "rewards/VideoAccuracy/mean": 0.7944942116737366, + "rewards/VideoAccuracy/std": 0.4951496422290802, + "step": 271, + "train_speed(iter/s)": 0.018605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 702.0, + "completions/mean_length": 355.26190185546875, + "completions/min_length": 232.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.474609375, + "entropy/min": 0.265625, + "epoch": 0.272, + "grad_norm": 1.4149181771197945, + "kl": 0.322265625, + "learning_rate": 1.6738335799171678e-06, + "loss": 0.003463061060756445, + "memory(GiB)": 146.12, + "reward": 1.6014920473098755, + "reward_std": 0.1528625339269638, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3407929837703705, + "rewards/EvidenceHallucination/std": 0.3648604154586792, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 0.594203531742096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5, + "rewards/VideoAccuracy/std": 0.5060608386993408, + "step": 272, + "train_speed(iter/s)": 0.018579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 432.0, + "completions/mean_length": 317.0714416503906, + "completions/min_length": 218.0, + "entropy/max": 0.515625, + "entropy/mean": 0.44140625, + "entropy/min": 0.30078125, + "epoch": 0.273, + "grad_norm": 1.5898356743054494, + "kl": 0.341796875, + "learning_rate": 1.6714854740636476e-06, + "loss": 0.0034236079081892967, + "memory(GiB)": 146.12, + "reward": 1.6726698875427246, + "reward_std": 0.12765172123908997, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4024078845977783, + "rewards/EvidenceHallucination/std": 0.38358187675476074, + "rewards/Evidence_Num_Record/mean": 3.0, + "rewards/Evidence_Num_Record/std": 0.38254600763320923, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5921885371208191, + "rewards/VideoAccuracy/std": 0.4514201283454895, + "step": 273, + "train_speed(iter/s)": 0.018532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 568.0, + "completions/mean_length": 372.6190490722656, + "completions/min_length": 307.0, + "entropy/max": 0.671875, + "entropy/mean": 0.390625, + "entropy/min": 0.232421875, + "epoch": 0.274, + "grad_norm": 1.3714403702806075, + "kl": 0.294921875, + "learning_rate": 1.669130606358858e-06, + "loss": 0.002969046588987112, + "memory(GiB)": 146.12, + "reward": 1.9991328716278076, + "reward_std": 0.25838133692741394, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4453437030315399, + "rewards/EvidenceHallucination/std": 0.437023788690567, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.3541688024997711, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.776730477809906, + "rewards/VideoAccuracy/std": 0.5350348353385925, + "step": 274, + "train_speed(iter/s)": 0.018539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 695.0, + "completions/mean_length": 371.4285888671875, + "completions/min_length": 214.0, + "entropy/max": 0.890625, + "entropy/mean": 0.4296875, + "entropy/min": 0.1552734375, + "epoch": 0.275, + "grad_norm": 1.4398381008790575, + "kl": 0.263671875, + "learning_rate": 1.6667690005162916e-06, + "loss": 0.002658488228917122, + "memory(GiB)": 146.12, + "reward": 1.5924758911132812, + "reward_std": 0.27572792768478394, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3190845847129822, + "rewards/EvidenceHallucination/std": 0.40540894865989685, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.8540400862693787, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.4381828308105469, + "rewards/VideoAccuracy/std": 0.4048685133457184, + "step": 275, + "train_speed(iter/s)": 0.018546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 802.0, + "completions/mean_length": 361.2857360839844, + "completions/min_length": 232.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.408203125, + "entropy/min": 0.23046875, + "epoch": 0.276, + "grad_norm": 1.4123614760517333, + "kl": 0.291015625, + "learning_rate": 1.6644006803172922e-06, + "loss": 0.0029547642916440964, + "memory(GiB)": 146.12, + "reward": 1.5313931703567505, + "reward_std": 0.2745862901210785, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3110722601413727, + "rewards/EvidenceHallucination/std": 0.3488800823688507, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.800696611404419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.46917879581451416, + "rewards/VideoAccuracy/std": 0.49849677085876465, + "step": 276, + "train_speed(iter/s)": 0.018543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 550.0, + "completions/mean_length": 369.8333435058594, + "completions/min_length": 233.0, + "entropy/max": 0.75, + "entropy/mean": 0.443359375, + "entropy/min": 0.2216796875, + "epoch": 0.277, + "grad_norm": 1.526393494077956, + "kl": 0.310546875, + "learning_rate": 1.6620256696108185e-06, + "loss": 0.0031316380482167006, + "memory(GiB)": 146.12, + "reward": 1.6020976305007935, + "reward_std": 0.18422286212444305, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33153560757637024, + "rewards/EvidenceHallucination/std": 0.41367554664611816, + "rewards/Evidence_Num_Record/mean": 2.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.5174089074134827, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4691237509250641, + "rewards/VideoAccuracy/std": 0.5692328810691833, + "step": 277, + "train_speed(iter/s)": 0.018558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 555.0, + "completions/mean_length": 370.69049072265625, + "completions/min_length": 203.0, + "entropy/max": 0.859375, + "entropy/mean": 0.384765625, + "entropy/min": 0.1611328125, + "epoch": 0.278, + "grad_norm": 1.404825204665656, + "kl": 0.2236328125, + "learning_rate": 1.6596439923132015e-06, + "loss": 0.0022652121260762215, + "memory(GiB)": 146.12, + "reward": 2.0874264240264893, + "reward_std": 0.14774879813194275, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4306017756462097, + "rewards/EvidenceHallucination/std": 0.4004756212234497, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.5823577642440796, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8679726123809814, + "rewards/VideoAccuracy/std": 0.38059890270233154, + "step": 278, + "train_speed(iter/s)": 0.018562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 660.0, + "completions/mean_length": 355.952392578125, + "completions/min_length": 273.0, + "entropy/max": 1.3828125, + "entropy/mean": 0.5078125, + "entropy/min": 0.341796875, + "epoch": 0.279, + "grad_norm": 1.5862269334906465, + "kl": 0.31640625, + "learning_rate": 1.6572556724079054e-06, + "loss": 0.003191668540239334, + "memory(GiB)": 146.12, + "reward": 1.796518087387085, + "reward_std": 0.189345121383667, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5061548948287964, + "rewards/EvidenceHallucination/std": 0.3945556581020355, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.8035924434661865, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6619536876678467, + "rewards/VideoAccuracy/std": 0.45757246017456055, + "step": 279, + "train_speed(iter/s)": 0.01855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 741.0, + "completions/mean_length": 381.952392578125, + "completions/min_length": 231.0, + "entropy/max": 0.77734375, + "entropy/mean": 0.474609375, + "entropy/min": 0.27734375, + "epoch": 0.28, + "grad_norm": 1.3224319250136158, + "kl": 0.296875, + "learning_rate": 1.6548607339452852e-06, + "loss": 0.003031244268640876, + "memory(GiB)": 146.12, + "reward": 1.2428820133209229, + "reward_std": 0.2718929350376129, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1785127818584442, + "rewards/EvidenceHallucination/std": 0.35546743869781494, + "rewards/Evidence_Num_Record/mean": 3.047619104385376, + "rewards/Evidence_Num_Record/std": 0.5388506650924683, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.20717935264110565, + "rewards/VideoAccuracy/std": 0.38427430391311646, + "step": 280, + "train_speed(iter/s)": 0.018555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 634.0, + "completions/mean_length": 415.9285888671875, + "completions/min_length": 294.0, + "entropy/max": 0.5625, + "entropy/mean": 0.34765625, + "entropy/min": 0.126953125, + "epoch": 0.281, + "grad_norm": 1.3244084971751433, + "kl": 0.2138671875, + "learning_rate": 1.6524592010423442e-06, + "loss": 0.0021475343964993954, + "memory(GiB)": 146.12, + "reward": 2.2283387184143066, + "reward_std": 0.22591161727905273, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5387340188026428, + "rewards/EvidenceHallucination/std": 0.3905985355377197, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.5868279337882996, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9253537654876709, + "rewards/VideoAccuracy/std": 0.3087855279445648, + "step": 281, + "train_speed(iter/s)": 0.018559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 481.0, + "completions/mean_length": 349.4047546386719, + "completions/min_length": 229.0, + "entropy/max": 0.8515625, + "entropy/mean": 0.53515625, + "entropy/min": 0.291015625, + "epoch": 0.282, + "grad_norm": 1.2315899887328492, + "kl": 0.326171875, + "learning_rate": 1.6500510978824923e-06, + "loss": 0.0032892085146158934, + "memory(GiB)": 146.12, + "reward": 1.4003338813781738, + "reward_std": 0.10361535847187042, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2764706611633301, + "rewards/EvidenceHallucination/std": 0.3708851933479309, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.6228330731391907, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3117063641548157, + "rewards/VideoAccuracy/std": 0.42241451144218445, + "step": 282, + "train_speed(iter/s)": 0.018554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 507.0, + "completions/mean_length": 381.66668701171875, + "completions/min_length": 282.0, + "entropy/max": 0.5625, + "entropy/mean": 0.44921875, + "entropy/min": 0.328125, + "epoch": 0.283, + "grad_norm": 1.0584350435835908, + "kl": 0.302734375, + "learning_rate": 1.6476364487153022e-06, + "loss": 0.003029999090358615, + "memory(GiB)": 146.12, + "reward": 1.4891345500946045, + "reward_std": 0.05869223177433014, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31638839840888977, + "rewards/EvidenceHallucination/std": 0.3799745440483093, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.49679580330848694, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4258568286895752, + "rewards/VideoAccuracy/std": 0.46307775378227234, + "step": 283, + "train_speed(iter/s)": 0.018545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 498.0, + "completions/mean_length": 387.4761962890625, + "completions/min_length": 293.0, + "entropy/max": 0.4921875, + "entropy/mean": 0.365234375, + "entropy/min": 0.1416015625, + "epoch": 0.284, + "grad_norm": 1.2350425664714775, + "kl": 0.298828125, + "learning_rate": 1.6452152778562628e-06, + "loss": 0.0029805107042193413, + "memory(GiB)": 146.12, + "reward": 1.9165128469467163, + "reward_std": 0.19557994604110718, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5106355547904968, + "rewards/EvidenceHallucination/std": 0.43393734097480774, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.5008703470230103, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6810523271560669, + "rewards/VideoAccuracy/std": 0.5946457386016846, + "step": 284, + "train_speed(iter/s)": 0.018557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 618.0, + "completions/mean_length": 406.19049072265625, + "completions/min_length": 255.0, + "entropy/max": 1.5390625, + "entropy/mean": 0.515625, + "entropy/min": 0.1884765625, + "epoch": 0.285, + "grad_norm": 1.4615945800675014, + "kl": 0.23828125, + "learning_rate": 1.6427876096865393e-06, + "loss": 0.002394037786871195, + "memory(GiB)": 146.12, + "reward": 2.0483367443084717, + "reward_std": 0.26414060592651367, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6125563383102417, + "rewards/EvidenceHallucination/std": 0.34441256523132324, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.7485952973365784, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8258254528045654, + "rewards/VideoAccuracy/std": 0.3920741677284241, + "step": 285, + "train_speed(iter/s)": 0.018574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 466.0, + "completions/mean_length": 338.73809814453125, + "completions/min_length": 213.0, + "entropy/max": 0.515625, + "entropy/mean": 0.419921875, + "entropy/min": 0.298828125, + "epoch": 0.286, + "grad_norm": 0.8613073507067102, + "kl": 0.302734375, + "learning_rate": 1.6403534686527223e-06, + "loss": 0.003025809768587351, + "memory(GiB)": 146.12, + "reward": 1.3098571300506592, + "reward_std": 0.0890624076128006, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18180575966835022, + "rewards/EvidenceHallucination/std": 0.32224151492118835, + "rewards/Evidence_Num_Record/mean": 3.0714285373687744, + "rewards/Evidence_Num_Record/std": 0.4629100561141968, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2734958827495575, + "rewards/VideoAccuracy/std": 0.43439802527427673, + "step": 286, + "train_speed(iter/s)": 0.018531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 564.0, + "completions/mean_length": 377.1428527832031, + "completions/min_length": 270.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.42578125, + "entropy/min": 0.255859375, + "epoch": 0.287, + "grad_norm": 1.6139082440394137, + "kl": 0.322265625, + "learning_rate": 1.6379128792665852e-06, + "loss": 0.0032466964330524206, + "memory(GiB)": 146.12, + "reward": 1.5950889587402344, + "reward_std": 0.3116930425167084, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3740018606185913, + "rewards/EvidenceHallucination/std": 0.439867228269577, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.532345175743103, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.45362186431884766, + "rewards/VideoAccuracy/std": 0.4430921971797943, + "step": 287, + "train_speed(iter/s)": 0.018537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 579.0, + "completions/mean_length": 421.4047546386719, + "completions/min_length": 306.0, + "entropy/max": 0.76953125, + "entropy/mean": 0.34765625, + "entropy/min": 0.09375, + "epoch": 0.288, + "grad_norm": 1.1985359286429647, + "kl": 0.2119140625, + "learning_rate": 1.6354658661048361e-06, + "loss": 0.0021414700895547867, + "memory(GiB)": 146.12, + "reward": 2.0947811603546143, + "reward_std": 0.245386004447937, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5193066596984863, + "rewards/EvidenceHallucination/std": 0.3565949499607086, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.5037605166435242, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8575864434242249, + "rewards/VideoAccuracy/std": 0.4079618752002716, + "step": 288, + "train_speed(iter/s)": 0.018533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 471.0, + "completions/mean_length": 367.0952453613281, + "completions/min_length": 283.0, + "entropy/max": 0.609375, + "entropy/mean": 0.443359375, + "entropy/min": 0.2734375, + "epoch": 0.289, + "grad_norm": 1.4710639856031327, + "kl": 0.298828125, + "learning_rate": 1.6330124538088703e-06, + "loss": 0.0030165519565343857, + "memory(GiB)": 146.12, + "reward": 1.7694091796875, + "reward_std": 0.21903131902217865, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.47865933179855347, + "rewards/EvidenceHallucination/std": 0.3591003715991974, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.5500501394271851, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6403442025184631, + "rewards/VideoAccuracy/std": 0.44497716426849365, + "step": 289, + "train_speed(iter/s)": 0.018538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 610.0, + "completions/mean_length": 398.2857360839844, + "completions/min_length": 281.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.435546875, + "entropy/min": 0.2578125, + "epoch": 0.29, + "grad_norm": 1.51714136619265, + "kl": 0.287109375, + "learning_rate": 1.6305526670845225e-06, + "loss": 0.0028732414357364178, + "memory(GiB)": 146.12, + "reward": 1.5587888956069946, + "reward_std": 0.276694655418396, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46286067366600037, + "rewards/EvidenceHallucination/std": 0.4573749303817749, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.4843665361404419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.46621668338775635, + "rewards/VideoAccuracy/std": 0.4573647677898407, + "step": 290, + "train_speed(iter/s)": 0.018533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 544.0, + "completions/mean_length": 420.21429443359375, + "completions/min_length": 290.0, + "entropy/max": 0.4921875, + "entropy/mean": 0.29296875, + "entropy/min": 0.1123046875, + "epoch": 0.291, + "grad_norm": 1.3266734858103288, + "kl": 0.2099609375, + "learning_rate": 1.6280865307018174e-06, + "loss": 0.0020992772188037634, + "memory(GiB)": 146.12, + "reward": 2.392334461212158, + "reward_std": 0.15745367109775543, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7055816054344177, + "rewards/EvidenceHallucination/std": 0.21519999206066132, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.49150747060775757, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.0512181520462036, + "rewards/VideoAccuracy/std": 0.19238141179084778, + "step": 291, + "train_speed(iter/s)": 0.018534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 468.0, + "completions/mean_length": 342.5, + "completions/min_length": 239.0, + "entropy/max": 1.640625, + "entropy/mean": 0.55859375, + "entropy/min": 0.3046875, + "epoch": 0.292, + "grad_norm": 1.497831242594547, + "kl": 0.294921875, + "learning_rate": 1.6256140694947215e-06, + "loss": 0.002976033603772521, + "memory(GiB)": 146.12, + "reward": 1.3734747171401978, + "reward_std": 0.29520106315612793, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19412264227867126, + "rewards/EvidenceHallucination/std": 0.33884894847869873, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.7543909549713135, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.30607864260673523, + "rewards/VideoAccuracy/std": 0.4278899133205414, + "step": 292, + "train_speed(iter/s)": 0.018532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 484.0, + "completions/mean_length": 350.3809509277344, + "completions/min_length": 243.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.419921875, + "entropy/min": 0.326171875, + "epoch": 0.293, + "grad_norm": 1.0298131574192986, + "kl": 0.298828125, + "learning_rate": 1.623135308360891e-06, + "loss": 0.0029970314353704453, + "memory(GiB)": 146.12, + "reward": 1.3211218118667603, + "reward_std": 0.23074208199977875, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21663756668567657, + "rewards/EvidenceHallucination/std": 0.36890122294425964, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.4371005594730377, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.2777942419052124, + "rewards/VideoAccuracy/std": 0.39285850524902344, + "step": 293, + "train_speed(iter/s)": 0.018541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 474.0, + "completions/mean_length": 365.6190490722656, + "completions/min_length": 305.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.390625, + "entropy/min": 0.1396484375, + "epoch": 0.294, + "grad_norm": 1.4985764630374498, + "kl": 0.271484375, + "learning_rate": 1.6206502722614236e-06, + "loss": 0.002712035086005926, + "memory(GiB)": 146.12, + "reward": 2.1152074337005615, + "reward_std": 0.26597416400909424, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5570864081382751, + "rewards/EvidenceHallucination/std": 0.39343953132629395, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 0.39743661880493164, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.8752186298370361, + "rewards/VideoAccuracy/std": 0.43338462710380554, + "step": 294, + "train_speed(iter/s)": 0.01855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 480.0, + "completions/mean_length": 361.16668701171875, + "completions/min_length": 221.0, + "entropy/max": 1.09375, + "entropy/mean": 0.45703125, + "entropy/min": 0.11767578125, + "epoch": 0.295, + "grad_norm": 1.525870474835172, + "kl": 0.2333984375, + "learning_rate": 1.6181589862206052e-06, + "loss": 0.0023380776401609182, + "memory(GiB)": 146.12, + "reward": 2.0736372470855713, + "reward_std": 0.33692988753318787, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6441951990127563, + "rewards/EvidenceHallucination/std": 0.32928863167762756, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.7635724544525146, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8447983264923096, + "rewards/VideoAccuracy/std": 0.40280503034591675, + "step": 295, + "train_speed(iter/s)": 0.018572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 541.0, + "completions/mean_length": 349.28570556640625, + "completions/min_length": 251.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.392578125, + "entropy/min": 0.224609375, + "epoch": 0.296, + "grad_norm": 1.6048282162774623, + "kl": 0.328125, + "learning_rate": 1.615661475325658e-06, + "loss": 0.00313993776217103, + "memory(GiB)": 146.12, + "reward": 1.466852068901062, + "reward_std": 0.3431503474712372, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3205814063549042, + "rewards/EvidenceHallucination/std": 0.4040875732898712, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.48496562242507935, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.40273576974868774, + "rewards/VideoAccuracy/std": 0.4944770336151123, + "step": 296, + "train_speed(iter/s)": 0.018534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 457.0, + "completions/mean_length": 343.8809509277344, + "completions/min_length": 227.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.396484375, + "entropy/min": 0.26171875, + "epoch": 0.297, + "grad_norm": 1.5646386644793522, + "kl": 0.294921875, + "learning_rate": 1.61315776472649e-06, + "loss": 0.0029657031409442425, + "memory(GiB)": 146.12, + "reward": 1.6441454887390137, + "reward_std": 0.24957570433616638, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.337864488363266, + "rewards/EvidenceHallucination/std": 0.39413684606552124, + "rewards/Evidence_Num_Record/mean": 2.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.46790117025375366, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5099059343338013, + "rewards/VideoAccuracy/std": 0.5179862380027771, + "step": 297, + "train_speed(iter/s)": 0.018496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 659.0, + "completions/mean_length": 439.21429443359375, + "completions/min_length": 325.0, + "entropy/max": 1.2265625, + "entropy/mean": 0.3828125, + "entropy/min": 0.1337890625, + "epoch": 0.298, + "grad_norm": 1.2283899643427931, + "kl": 0.19140625, + "learning_rate": 1.6106478796354382e-06, + "loss": 0.0019259240943938494, + "memory(GiB)": 146.12, + "reward": 1.9087477922439575, + "reward_std": 0.12891468405723572, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5294734239578247, + "rewards/EvidenceHallucination/std": 0.3857249617576599, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.8249872326850891, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6695197224617004, + "rewards/VideoAccuracy/std": 0.4106285572052002, + "step": 298, + "train_speed(iter/s)": 0.018501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 627.0, + "completions/mean_length": 359.26190185546875, + "completions/min_length": 199.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.4296875, + "entropy/min": 0.283203125, + "epoch": 0.299, + "grad_norm": 1.4108113774763722, + "kl": 0.263671875, + "learning_rate": 1.608131845327018e-06, + "loss": 0.0026504751294851303, + "memory(GiB)": 146.12, + "reward": 1.3847062587738037, + "reward_std": 0.21600672602653503, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2568646967411041, + "rewards/EvidenceHallucination/std": 0.4156530499458313, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 1.173903226852417, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4500652849674225, + "step": 299, + "train_speed(iter/s)": 0.018494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 519.0, + "completions/mean_length": 356.23809814453125, + "completions/min_length": 259.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.404296875, + "entropy/min": 0.27734375, + "epoch": 0.3, + "grad_norm": 1.4286158463874457, + "kl": 0.251953125, + "learning_rate": 1.6056096871376666e-06, + "loss": 0.0025103879161179066, + "memory(GiB)": 146.12, + "reward": 1.280792236328125, + "reward_std": 0.29425978660583496, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1964530497789383, + "rewards/EvidenceHallucination/std": 0.3479928970336914, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 1.060591697692871, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.24150165915489197, + "rewards/VideoAccuracy/std": 0.33755213022232056, + "step": 300, + "train_speed(iter/s)": 0.018496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 559.0, + "completions/mean_length": 384.6190490722656, + "completions/min_length": 190.0, + "entropy/max": 0.65625, + "entropy/mean": 0.3203125, + "entropy/min": 0.1259765625, + "epoch": 0.301, + "grad_norm": 1.3581633235784614, + "kl": 0.2041015625, + "learning_rate": 1.6030814304654894e-06, + "loss": 0.0020680841989815235, + "memory(GiB)": 146.12, + "reward": 2.094529390335083, + "reward_std": 0.1851232498884201, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46943730115890503, + "rewards/EvidenceHallucination/std": 0.37510302662849426, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.6325473189353943, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8006417751312256, + "rewards/VideoAccuracy/std": 0.34724944829940796, + "step": 301, + "train_speed(iter/s)": 0.01845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 455.0, + "completions/mean_length": 324.5, + "completions/min_length": 212.0, + "entropy/max": 1.8515625, + "entropy/mean": 0.55078125, + "entropy/min": 0.296875, + "epoch": 0.302, + "grad_norm": 1.6106867892162855, + "kl": 0.287109375, + "learning_rate": 1.600547100770003e-06, + "loss": 0.0028827935457229614, + "memory(GiB)": 146.12, + "reward": 1.6848338842391968, + "reward_std": 0.23833847045898438, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3699362576007843, + "rewards/EvidenceHallucination/std": 0.36776798963546753, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.6357524991035461, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5775132775306702, + "rewards/VideoAccuracy/std": 0.4651445746421814, + "step": 302, + "train_speed(iter/s)": 0.018461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1195.0, + "completions/mean_length": 378.3571472167969, + "completions/min_length": 257.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.365234375, + "entropy/min": 0.203125, + "epoch": 0.303, + "grad_norm": 1.1537914865743932, + "kl": 0.2421875, + "learning_rate": 1.598006723571879e-06, + "loss": 0.0024713780730962753, + "memory(GiB)": 146.12, + "reward": 1.2205116748809814, + "reward_std": 0.29441866278648376, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18889151513576508, + "rewards/EvidenceHallucination/std": 0.3691125810146332, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 1.8759434223175049, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.18273337185382843, + "rewards/VideoAccuracy/std": 0.349786639213562, + "step": 303, + "train_speed(iter/s)": 0.018444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 545.0, + "completions/mean_length": 385.452392578125, + "completions/min_length": 317.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.384765625, + "entropy/min": 0.13671875, + "epoch": 0.304, + "grad_norm": 1.310105879510366, + "kl": 0.234375, + "learning_rate": 1.595460324452688e-06, + "loss": 0.002331523923203349, + "memory(GiB)": 146.12, + "reward": 1.746359944343567, + "reward_std": 0.22891227900981903, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2776835560798645, + "rewards/EvidenceHallucination/std": 0.3692076802253723, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.6325473189353943, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.562251627445221, + "rewards/VideoAccuracy/std": 0.5393766164779663, + "step": 304, + "train_speed(iter/s)": 0.018453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 693.0, + "completions/mean_length": 394.5952453613281, + "completions/min_length": 187.0, + "entropy/max": 1.3359375, + "entropy/mean": 0.546875, + "entropy/min": 0.1357421875, + "epoch": 0.305, + "grad_norm": 1.4417766793150282, + "kl": 0.2099609375, + "learning_rate": 1.5929079290546405e-06, + "loss": 0.0021169825922697783, + "memory(GiB)": 146.12, + "reward": 1.793710470199585, + "reward_std": 0.28301554918289185, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30675727128982544, + "rewards/EvidenceHallucination/std": 0.36037933826446533, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 1.267845869064331, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.632358968257904, + "rewards/VideoAccuracy/std": 0.50540691614151, + "step": 305, + "train_speed(iter/s)": 0.018468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 491.0, + "completions/mean_length": 339.69049072265625, + "completions/min_length": 240.0, + "entropy/max": 0.53125, + "entropy/mean": 0.421875, + "entropy/min": 0.2890625, + "epoch": 0.306, + "grad_norm": 1.0434674554954733, + "kl": 0.25, + "learning_rate": 1.5903495630803298e-06, + "loss": 0.002527870936319232, + "memory(GiB)": 146.12, + "reward": 1.4312735795974731, + "reward_std": 0.0730072408914566, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31148022413253784, + "rewards/EvidenceHallucination/std": 0.4281456172466278, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.7071067094802856, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.36897751688957214, + "rewards/VideoAccuracy/std": 0.47729581594467163, + "step": 306, + "train_speed(iter/s)": 0.018442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 521.0, + "completions/mean_length": 374.69049072265625, + "completions/min_length": 282.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.38671875, + "entropy/min": 0.291015625, + "epoch": 0.307, + "grad_norm": 1.3614605604782124, + "kl": 0.26953125, + "learning_rate": 1.587785252292473e-06, + "loss": 0.0027160055469721556, + "memory(GiB)": 146.12, + "reward": 1.530571699142456, + "reward_std": 0.07783089578151703, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.25006505846977234, + "rewards/EvidenceHallucination/std": 0.37279269099235535, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 0.5436787009239197, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.41865381598472595, + "rewards/VideoAccuracy/std": 0.43165814876556396, + "step": 307, + "train_speed(iter/s)": 0.018454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 822.0, + "completions/mean_length": 439.71429443359375, + "completions/min_length": 288.0, + "entropy/max": 1.8828125, + "entropy/mean": 0.484375, + "entropy/min": 0.125, + "epoch": 0.308, + "grad_norm": 1.2920309784432153, + "kl": 0.17578125, + "learning_rate": 1.5852150225136515e-06, + "loss": 0.001778631005436182, + "memory(GiB)": 146.12, + "reward": 2.06137752532959, + "reward_std": 0.15827319025993347, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44515714049339294, + "rewards/EvidenceHallucination/std": 0.3905835449695587, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.6917465925216675, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8390125036239624, + "rewards/VideoAccuracy/std": 0.3652336299419403, + "step": 308, + "train_speed(iter/s)": 0.018447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 512.0, + "completions/mean_length": 363.5, + "completions/min_length": 281.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.419921875, + "entropy/min": 0.310546875, + "epoch": 0.309, + "grad_norm": 1.0898502143495572, + "kl": 0.251953125, + "learning_rate": 1.5826388996260502e-06, + "loss": 0.0025412074755877256, + "memory(GiB)": 146.12, + "reward": 1.4703563451766968, + "reward_std": 0.1100665032863617, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3279728889465332, + "rewards/EvidenceHallucination/std": 0.4086807668209076, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.8025076985359192, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4047619104385376, + "rewards/VideoAccuracy/std": 0.49679574370384216, + "step": 309, + "train_speed(iter/s)": 0.018445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 543.0, + "completions/mean_length": 352.66668701171875, + "completions/min_length": 201.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.41796875, + "entropy/min": 0.2265625, + "epoch": 0.31, + "grad_norm": 1.3505541919971262, + "kl": 0.2490234375, + "learning_rate": 1.5800569095711981e-06, + "loss": 0.002500642091035843, + "memory(GiB)": 146.12, + "reward": 1.4585460424423218, + "reward_std": 0.22531914710998535, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3418963849544525, + "rewards/EvidenceHallucination/std": 0.4326116442680359, + "rewards/Evidence_Num_Record/mean": 3.0714285373687744, + "rewards/Evidence_Num_Record/std": 0.7454858422279358, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.390166699886322, + "rewards/VideoAccuracy/std": 0.4661102294921875, + "step": 310, + "train_speed(iter/s)": 0.018447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 548.0, + "completions/mean_length": 395.952392578125, + "completions/min_length": 276.0, + "entropy/max": 0.50390625, + "entropy/mean": 0.279296875, + "entropy/min": 0.125, + "epoch": 0.311, + "grad_norm": 1.241576956768926, + "kl": 0.1962890625, + "learning_rate": 1.5774690783497064e-06, + "loss": 0.001979774795472622, + "memory(GiB)": 146.12, + "reward": 2.5162124633789062, + "reward_std": 0.160222128033638, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5431568622589111, + "rewards/EvidenceHallucination/std": 0.4012848436832428, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.506060779094696, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.2075811624526978, + "rewards/VideoAccuracy/std": 0.202640101313591, + "step": 311, + "train_speed(iter/s)": 0.018449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 501.0, + "completions/mean_length": 381.76190185546875, + "completions/min_length": 285.0, + "entropy/max": 0.94921875, + "entropy/mean": 0.4609375, + "entropy/min": 0.251953125, + "epoch": 0.312, + "grad_norm": 1.5698624921416586, + "kl": 0.251953125, + "learning_rate": 1.5748754320210072e-06, + "loss": 0.0025334805250167847, + "memory(GiB)": 146.12, + "reward": 1.825404405593872, + "reward_std": 0.20616315305233002, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5555937886238098, + "rewards/EvidenceHallucination/std": 0.3749423921108246, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.6270148158073425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7142857313156128, + "rewards/VideoAccuracy/std": 0.45722994208335876, + "step": 312, + "train_speed(iter/s)": 0.018452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 601.0, + "completions/mean_length": 391.19049072265625, + "completions/min_length": 276.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.421875, + "entropy/min": 0.28515625, + "epoch": 0.313, + "grad_norm": 1.5479755955950403, + "kl": 0.2353515625, + "learning_rate": 1.5722759967030896e-06, + "loss": 0.0023774546571075916, + "memory(GiB)": 146.12, + "reward": 1.5936198234558105, + "reward_std": 0.254268616437912, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.38316085934638977, + "rewards/EvidenceHallucination/std": 0.4003508985042572, + "rewards/Evidence_Num_Record/mean": 3.238095283508301, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5169876217842102, + "rewards/VideoAccuracy/std": 0.4889325499534607, + "step": 313, + "train_speed(iter/s)": 0.018445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 445.0, + "completions/mean_length": 377.5238037109375, + "completions/min_length": 261.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.3984375, + "entropy/min": 0.1474609375, + "epoch": 0.314, + "grad_norm": 1.5740046501554912, + "kl": 0.251953125, + "learning_rate": 1.5696707985722389e-06, + "loss": 0.0025313228834420443, + "memory(GiB)": 146.12, + "reward": 1.8846992254257202, + "reward_std": 0.1694190353155136, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4761403501033783, + "rewards/EvidenceHallucination/std": 0.4628937542438507, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.5328903794288635, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.6656614542007446, + "rewards/VideoAccuracy/std": 0.5901932120323181, + "step": 314, + "train_speed(iter/s)": 0.01845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 705.0, + "completions/mean_length": 418.6190490722656, + "completions/min_length": 309.0, + "entropy/max": 1.03125, + "entropy/mean": 0.48828125, + "entropy/min": 0.146484375, + "epoch": 0.315, + "grad_norm": 1.318430944373399, + "kl": 0.2294921875, + "learning_rate": 1.5670598638627706e-06, + "loss": 0.002312577562406659, + "memory(GiB)": 146.12, + "reward": 1.614781379699707, + "reward_std": 0.29396653175354004, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2715763449668884, + "rewards/EvidenceHallucination/std": 0.35540372133255005, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9997096061706543, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4937993586063385, + "rewards/VideoAccuracy/std": 0.5385206937789917, + "step": 315, + "train_speed(iter/s)": 0.018447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 558.0, + "completions/mean_length": 381.9047546386719, + "completions/min_length": 213.0, + "entropy/max": 0.5625, + "entropy/mean": 0.42578125, + "entropy/min": 0.27734375, + "epoch": 0.316, + "grad_norm": 1.3986195379080097, + "kl": 0.25390625, + "learning_rate": 1.5644432188667693e-06, + "loss": 0.0025634332560002804, + "memory(GiB)": 146.12, + "reward": 1.4754207134246826, + "reward_std": 0.17962013185024261, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29231202602386475, + "rewards/EvidenceHallucination/std": 0.39029237627983093, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.7265497446060181, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.38838687539100647, + "rewards/VideoAccuracy/std": 0.45132169127464294, + "step": 316, + "train_speed(iter/s)": 0.018446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 608.0, + "completions/mean_length": 402.3809509277344, + "completions/min_length": 319.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.443359375, + "entropy/min": 0.322265625, + "epoch": 0.317, + "grad_norm": 1.6504393361258312, + "kl": 0.259765625, + "learning_rate": 1.56182088993382e-06, + "loss": 0.0026385614182800055, + "memory(GiB)": 146.12, + "reward": 1.6107887029647827, + "reward_std": 0.23265895247459412, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.3710850477218628, + "rewards/EvidenceHallucination/std": 0.4066302478313446, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.7419721484184265, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.49133360385894775, + "rewards/VideoAccuracy/std": 0.48049360513687134, + "step": 317, + "train_speed(iter/s)": 0.018443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 636.0, + "completions/mean_length": 454.71429443359375, + "completions/min_length": 324.0, + "entropy/max": 0.59375, + "entropy/mean": 0.33203125, + "entropy/min": 0.11865234375, + "epoch": 0.318, + "grad_norm": 1.1005909927617632, + "kl": 0.2001953125, + "learning_rate": 1.5591929034707466e-06, + "loss": 0.002016433048993349, + "memory(GiB)": 146.12, + "reward": 1.6688334941864014, + "reward_std": 0.20153991878032684, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.08643382787704468, + "rewards/EvidenceHallucination/std": 0.23969198763370514, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.4703768193721771, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5182134509086609, + "rewards/VideoAccuracy/std": 0.5244945287704468, + "step": 318, + "train_speed(iter/s)": 0.018446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/mean_length": 397.0, + "completions/min_length": 283.0, + "entropy/max": 1.34375, + "entropy/mean": 0.494140625, + "entropy/min": 0.376953125, + "epoch": 0.319, + "grad_norm": 1.4631850543181406, + "kl": 0.2470703125, + "learning_rate": 1.556559285941344e-06, + "loss": 0.002471283543854952, + "memory(GiB)": 146.12, + "reward": 1.7699050903320312, + "reward_std": 0.15336094796657562, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5161923766136169, + "rewards/EvidenceHallucination/std": 0.39299091696739197, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 1.0801234245300293, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711876034736633, + "step": 319, + "train_speed(iter/s)": 0.018451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 539.0, + "completions/mean_length": 389.26190185546875, + "completions/min_length": 249.0, + "entropy/max": 0.578125, + "entropy/mean": 0.43359375, + "entropy/min": 0.32421875, + "epoch": 0.32, + "grad_norm": 1.4761331770574069, + "kl": 0.255859375, + "learning_rate": 1.5539200638661104e-06, + "loss": 0.0025728538166731596, + "memory(GiB)": 146.12, + "reward": 1.4469672441482544, + "reward_std": 0.2820085883140564, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2830442786216736, + "rewards/EvidenceHallucination/std": 0.39048337936401367, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 0.551631510257721, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3570248782634735, + "rewards/VideoAccuracy/std": 0.3700158894062042, + "step": 320, + "train_speed(iter/s)": 0.018457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 532.0, + "completions/mean_length": 413.8095397949219, + "completions/min_length": 327.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.306640625, + "entropy/min": 0.1376953125, + "epoch": 0.321, + "grad_norm": 1.236470466255527, + "kl": 0.21484375, + "learning_rate": 1.5512752638219832e-06, + "loss": 0.0021507274359464645, + "memory(GiB)": 146.12, + "reward": 2.2372350692749023, + "reward_std": 0.14746171236038208, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.38625553250312805, + "rewards/EvidenceHallucination/std": 0.3984059989452362, + "rewards/Evidence_Num_Record/mean": 3.2142858505249023, + "rewards/Evidence_Num_Record/std": 0.7168942093849182, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9766505360603333, + "rewards/VideoAccuracy/std": 0.4403650760650635, + "step": 321, + "train_speed(iter/s)": 0.01846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 638.0, + "completions/mean_length": 439.0, + "completions/min_length": 321.0, + "entropy/max": 1.234375, + "entropy/mean": 0.5234375, + "entropy/min": 0.298828125, + "epoch": 0.322, + "grad_norm": 1.2952347326863511, + "kl": 0.2578125, + "learning_rate": 1.5486249124420701e-06, + "loss": 0.0025827204808592796, + "memory(GiB)": 146.12, + "reward": 1.217543125152588, + "reward_std": 0.3603568971157074, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13533399999141693, + "rewards/EvidenceHallucination/std": 0.29350516200065613, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 1.007521152496338, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.190476194024086, + "rewards/VideoAccuracy/std": 0.39743661880493164, + "step": 322, + "train_speed(iter/s)": 0.018454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 609.0, + "completions/mean_length": 429.9285888671875, + "completions/min_length": 305.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.5, + "entropy/min": 0.298828125, + "epoch": 0.323, + "grad_norm": 1.3586636355187904, + "kl": 0.23046875, + "learning_rate": 1.545969036415379e-06, + "loss": 0.002311261370778084, + "memory(GiB)": 146.12, + "reward": 1.468063235282898, + "reward_std": 0.26232224702835083, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30530834197998047, + "rewards/EvidenceHallucination/std": 0.4035953879356384, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.7344991564750671, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.095238097012043, + "rewards/HonestTime/std": 0.297101765871048, + "rewards/VideoAccuracy/mean": 0.3879539370536804, + "rewards/VideoAccuracy/std": 0.4327782094478607, + "step": 323, + "train_speed(iter/s)": 0.01845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 571.0, + "completions/mean_length": 423.4761962890625, + "completions/min_length": 263.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.41015625, + "entropy/min": 0.12890625, + "epoch": 0.324, + "grad_norm": 1.3775070305346473, + "kl": 0.2490234375, + "learning_rate": 1.543307662486553e-06, + "loss": 0.0024996590800583363, + "memory(GiB)": 146.12, + "reward": 1.8816767930984497, + "reward_std": 0.282106876373291, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41124141216278076, + "rewards/EvidenceHallucination/std": 0.4110432267189026, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.547404408454895, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6660952568054199, + "rewards/VideoAccuracy/std": 0.5266658663749695, + "step": 324, + "train_speed(iter/s)": 0.018451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 696.0, + "completions/mean_length": 466.1190490722656, + "completions/min_length": 324.0, + "entropy/max": 0.984375, + "entropy/mean": 0.48046875, + "entropy/min": 0.1572265625, + "epoch": 0.325, + "grad_norm": 1.3292958129358652, + "kl": 0.2138671875, + "learning_rate": 1.5406408174555977e-06, + "loss": 0.002168423030525446, + "memory(GiB)": 146.12, + "reward": 1.9544423818588257, + "reward_std": 0.2796865999698639, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37937915325164795, + "rewards/EvidenceHallucination/std": 0.33197489380836487, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.845841109752655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8118998408317566, + "rewards/VideoAccuracy/std": 0.4680696427822113, + "step": 325, + "train_speed(iter/s)": 0.018427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 523.0, + "completions/mean_length": 409.0, + "completions/min_length": 312.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.44921875, + "entropy/min": 0.310546875, + "epoch": 0.326, + "grad_norm": 1.3407404990910743, + "kl": 0.263671875, + "learning_rate": 1.5379685281776125e-06, + "loss": 0.00263267382979393, + "memory(GiB)": 146.12, + "reward": 1.5945847034454346, + "reward_std": 0.12171037495136261, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39156463742256165, + "rewards/EvidenceHallucination/std": 0.4287061393260956, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.6228330731391907, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.48293837904930115, + "rewards/VideoAccuracy/std": 0.43847227096557617, + "step": 326, + "train_speed(iter/s)": 0.018432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 564.0, + "completions/mean_length": 421.952392578125, + "completions/min_length": 269.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.439453125, + "entropy/min": 0.30078125, + "epoch": 0.327, + "grad_norm": 1.2325435626664694, + "kl": 0.27734375, + "learning_rate": 1.5352908215625213e-06, + "loss": 0.002784645650535822, + "memory(GiB)": 146.12, + "reward": 1.6382532119750977, + "reward_std": 0.18829719722270966, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3113822638988495, + "rewards/EvidenceHallucination/std": 0.36439794301986694, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.5961549282073975, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5093101263046265, + "rewards/VideoAccuracy/std": 0.5441337823867798, + "step": 327, + "train_speed(iter/s)": 0.018436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 848.0, + "completions/mean_length": 493.0714416503906, + "completions/min_length": 334.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.37109375, + "entropy/min": 0.1357421875, + "epoch": 0.328, + "grad_norm": 1.026689315116029, + "kl": 0.20703125, + "learning_rate": 1.5326077245747997e-06, + "loss": 0.002078625839203596, + "memory(GiB)": 146.12, + "reward": 1.901085615158081, + "reward_std": 0.16592055559158325, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2927778959274292, + "rewards/EvidenceHallucination/std": 0.3463214337825775, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.8821365237236023, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7211014628410339, + "rewards/VideoAccuracy/std": 0.5334154367446899, + "step": 328, + "train_speed(iter/s)": 0.01843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 557.0, + "completions/mean_length": 422.73809814453125, + "completions/min_length": 288.0, + "entropy/max": 1.1640625, + "entropy/mean": 0.498046875, + "entropy/min": 0.345703125, + "epoch": 0.329, + "grad_norm": 1.475495745818706, + "kl": 0.24609375, + "learning_rate": 1.5299192642332049e-06, + "loss": 0.0024703675881028175, + "memory(GiB)": 146.12, + "reward": 1.6978988647460938, + "reward_std": 0.22649529576301575, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.4537801444530487, + "rewards/EvidenceHallucination/std": 0.3907560408115387, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.7948731780052185, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6190476417541504, + "rewards/VideoAccuracy/std": 0.4915074408054352, + "step": 329, + "train_speed(iter/s)": 0.018436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 594.0, + "completions/mean_length": 412.26190185546875, + "completions/min_length": 225.0, + "entropy/max": 0.640625, + "entropy/mean": 0.4609375, + "entropy/min": 0.2890625, + "epoch": 0.33, + "grad_norm": 1.4157097744857878, + "kl": 0.25, + "learning_rate": 1.5272254676105023e-06, + "loss": 0.002520774258300662, + "memory(GiB)": 146.12, + "reward": 1.5191279649734497, + "reward_std": 0.30416056513786316, + "rewards/EvidenceFormat/mean": 0.9523809552192688, + "rewards/EvidenceFormat/std": 0.21554027497768402, + "rewards/EvidenceHallucination/mean": 0.3154602646827698, + "rewards/EvidenceHallucination/std": 0.36483141779899597, + "rewards/Evidence_Num_Record/mean": 3.1190476417541504, + "rewards/Evidence_Num_Record/std": 0.916046142578125, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.4560360312461853, + "rewards/VideoAccuracy/std": 0.4275130331516266, + "step": 330, + "train_speed(iter/s)": 0.018441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 581.0, + "completions/mean_length": 403.6190490722656, + "completions/min_length": 310.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.34375, + "entropy/min": 0.1708984375, + "epoch": 0.331, + "grad_norm": 1.346354815792557, + "kl": 0.234375, + "learning_rate": 1.5245263618331943e-06, + "loss": 0.0023695288691669703, + "memory(GiB)": 146.12, + "reward": 2.5452191829681396, + "reward_std": 0.1293453872203827, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7590943574905396, + "rewards/EvidenceHallucination/std": 0.3576880395412445, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.37719547748565674, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.193400263786316, + "rewards/VideoAccuracy/std": 0.15034785866737366, + "step": 331, + "train_speed(iter/s)": 0.018447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 617.0, + "completions/mean_length": 443.3095397949219, + "completions/min_length": 307.0, + "entropy/max": 1.1796875, + "entropy/mean": 0.60546875, + "entropy/min": 0.34375, + "epoch": 0.332, + "grad_norm": 8.14056828763085, + "kl": 0.279296875, + "learning_rate": 1.521821974081246e-06, + "loss": 0.0028224079869687557, + "memory(GiB)": 146.12, + "reward": 1.7714673280715942, + "reward_std": 0.33833831548690796, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5240030288696289, + "rewards/EvidenceHallucination/std": 0.4096086621284485, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.7501451373100281, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 332, + "train_speed(iter/s)": 0.018456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 626.0, + "completions/mean_length": 430.5476379394531, + "completions/min_length": 322.0, + "entropy/max": 0.59375, + "entropy/mean": 0.458984375, + "entropy/min": 0.33984375, + "epoch": 0.333, + "grad_norm": 1.2195400931695357, + "kl": 0.25390625, + "learning_rate": 1.519112331587812e-06, + "loss": 0.002552357502281666, + "memory(GiB)": 146.12, + "reward": 1.5343940258026123, + "reward_std": 0.27544981241226196, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2723103165626526, + "rewards/EvidenceHallucination/std": 0.3592282831668854, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.5702658891677856, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.44659873843193054, + "rewards/VideoAccuracy/std": 0.47520163655281067, + "step": 333, + "train_speed(iter/s)": 0.018431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 507.0, + "completions/mean_length": 396.76190185546875, + "completions/min_length": 326.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.416015625, + "entropy/min": 0.1455078125, + "epoch": 0.334, + "grad_norm": 1.1881402793874947, + "kl": 0.251953125, + "learning_rate": 1.5163974616389618e-06, + "loss": 0.0025246115401387215, + "memory(GiB)": 146.12, + "reward": 1.7349523305892944, + "reward_std": 0.06460803747177124, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4099797010421753, + "rewards/EvidenceHallucination/std": 0.4215739667415619, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 0.44500061869621277, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5196231603622437, + "rewards/VideoAccuracy/std": 0.5055806040763855, + "step": 334, + "train_speed(iter/s)": 0.018447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/mean_length": 506.3095397949219, + "completions/min_length": 350.0, + "entropy/max": 1.65625, + "entropy/mean": 0.5234375, + "entropy/min": 0.1611328125, + "epoch": 0.335, + "grad_norm": 1.016418452628076, + "kl": 0.2236328125, + "learning_rate": 1.5136773915734064e-06, + "loss": 0.002261554356664419, + "memory(GiB)": 146.12, + "reward": 1.7054145336151123, + "reward_std": 0.10020820796489716, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32015758752822876, + "rewards/EvidenceHallucination/std": 0.3698805868625641, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.7970489263534546, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5747164487838745, + "rewards/VideoAccuracy/std": 0.4819638133049011, + "step": 335, + "train_speed(iter/s)": 0.018439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 591.0, + "completions/mean_length": 431.9761962890625, + "completions/min_length": 318.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.478515625, + "entropy/min": 0.34765625, + "epoch": 0.336, + "grad_norm": 1.3505539461602332, + "kl": 0.25390625, + "learning_rate": 1.5109521487822206e-06, + "loss": 0.00255573564209044, + "memory(GiB)": 146.12, + "reward": 1.6756770610809326, + "reward_std": 0.17967942357063293, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4279189109802246, + "rewards/EvidenceHallucination/std": 0.42436590790748596, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.5037605166435242, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777005434036255, + "rewards/VideoAccuracy/mean": 0.5662835240364075, + "rewards/VideoAccuracy/std": 0.449936181306839, + "step": 336, + "train_speed(iter/s)": 0.018441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 551.0, + "completions/mean_length": 421.21429443359375, + "completions/min_length": 294.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.453125, + "entropy/min": 0.359375, + "epoch": 0.337, + "grad_norm": 1.5515733832419425, + "kl": 0.275390625, + "learning_rate": 1.508221760708569e-06, + "loss": 0.0027671372517943382, + "memory(GiB)": 146.12, + "reward": 1.931458592414856, + "reward_std": 0.28220516443252563, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4907691776752472, + "rewards/EvidenceHallucination/std": 0.40087586641311646, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.5823577642440796, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.7333047389984131, + "rewards/VideoAccuracy/std": 0.5154536962509155, + "step": 337, + "train_speed(iter/s)": 0.018447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/mean_length": 481.69049072265625, + "completions/min_length": 356.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.36328125, + "entropy/min": 0.1474609375, + "epoch": 0.338, + "grad_norm": 1.1877824818529439, + "kl": 0.2041015625, + "learning_rate": 1.5054862548474297e-06, + "loss": 0.0020632906816899776, + "memory(GiB)": 146.12, + "reward": 1.9895719289779663, + "reward_std": 0.166742205619812, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4769146740436554, + "rewards/EvidenceHallucination/std": 0.34906309843063354, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.7650920152664185, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7608555555343628, + "rewards/VideoAccuracy/std": 0.3460628092288971, + "step": 338, + "train_speed(iter/s)": 0.018438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 566.0, + "completions/mean_length": 427.5714416503906, + "completions/min_length": 296.0, + "entropy/max": 0.82421875, + "entropy/mean": 0.5, + "entropy/min": 0.302734375, + "epoch": 0.339, + "grad_norm": 1.2548089464891523, + "kl": 0.275390625, + "learning_rate": 1.5027456587453158e-06, + "loss": 0.0027769131120294333, + "memory(GiB)": 146.12, + "reward": 1.5119894742965698, + "reward_std": 0.31302163004875183, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3102223873138428, + "rewards/EvidenceHallucination/std": 0.3759114146232605, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.592735767364502, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.4213736355304718, + "rewards/VideoAccuracy/std": 0.4750100076198578, + "step": 339, + "train_speed(iter/s)": 0.018433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 567.0, + "completions/mean_length": 436.0238037109375, + "completions/min_length": 277.0, + "entropy/max": 0.640625, + "entropy/mean": 0.4375, + "entropy/min": 0.2333984375, + "epoch": 0.34, + "grad_norm": 1.384410392745363, + "kl": 0.259765625, + "learning_rate": 1.5e-06, + "loss": 0.002623537089675665, + "memory(GiB)": 146.12, + "reward": 1.6347346305847168, + "reward_std": 0.3242671489715576, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4092910587787628, + "rewards/EvidenceHallucination/std": 0.47143739461898804, + "rewards/Evidence_Num_Record/mean": 3.095238208770752, + "rewards/Evidence_Num_Record/std": 0.6555401086807251, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.4909716248512268, + "rewards/VideoAccuracy/std": 0.4699263572692871, + "step": 340, + "train_speed(iter/s)": 0.018442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 625.0, + "completions/mean_length": 433.452392578125, + "completions/min_length": 274.0, + "entropy/max": 0.90625, + "entropy/mean": 0.373046875, + "entropy/min": 0.1474609375, + "epoch": 0.341, + "grad_norm": 1.4876821617864908, + "kl": 0.234375, + "learning_rate": 1.4972493062602354e-06, + "loss": 0.0023775191511958838, + "memory(GiB)": 146.12, + "reward": 1.7248107194900513, + "reward_std": 0.27018630504608154, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24774467945098877, + "rewards/EvidenceHallucination/std": 0.3780077397823334, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.8570944666862488, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.4847853481769562, + "rewards/VideoAccuracy/std": 0.438723087310791, + "step": 341, + "train_speed(iter/s)": 0.018437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 591.0, + "completions/mean_length": 453.69049072265625, + "completions/min_length": 278.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.52734375, + "entropy/min": 0.400390625, + "epoch": 0.342, + "grad_norm": 1.1801274023177015, + "kl": 0.255859375, + "learning_rate": 1.4944936052254768e-06, + "loss": 0.002568014431744814, + "memory(GiB)": 146.12, + "reward": 1.3495653867721558, + "reward_std": 0.19368955492973328, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2002081274986267, + "rewards/EvidenceHallucination/std": 0.32005855441093445, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.7669872641563416, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3095238208770752, + "rewards/VideoAccuracy/std": 0.4679011106491089, + "step": 342, + "train_speed(iter/s)": 0.018432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 546.0, + "completions/mean_length": 399.8333435058594, + "completions/min_length": 298.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.447265625, + "entropy/min": 0.2890625, + "epoch": 0.343, + "grad_norm": 1.288550888438917, + "kl": 0.263671875, + "learning_rate": 1.491732924645604e-06, + "loss": 0.002660168334841728, + "memory(GiB)": 146.12, + "reward": 1.5326708555221558, + "reward_std": 0.11299590766429901, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3803858458995819, + "rewards/EvidenceHallucination/std": 0.4688884913921356, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.5372316837310791, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.39945095777511597, + "rewards/VideoAccuracy/std": 0.46400779485702515, + "step": 343, + "train_speed(iter/s)": 0.01843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 592.0, + "completions/mean_length": 442.3809509277344, + "completions/min_length": 267.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.42578125, + "entropy/min": 0.1796875, + "epoch": 0.344, + "grad_norm": 1.2768522713214825, + "kl": 0.287109375, + "learning_rate": 1.4889672923206388e-06, + "loss": 0.0028895996510982513, + "memory(GiB)": 146.12, + "reward": 2.0022213459014893, + "reward_std": 0.03883177042007446, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5681445002555847, + "rewards/EvidenceHallucination/std": 0.41701605916023254, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.6325473189353943, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.721925675868988, + "rewards/VideoAccuracy/std": 0.512051522731781, + "step": 344, + "train_speed(iter/s)": 0.018425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 703.0, + "completions/mean_length": 450.5, + "completions/min_length": 318.0, + "entropy/max": 1.8203125, + "entropy/mean": 0.49609375, + "entropy/min": 0.109375, + "epoch": 0.345, + "grad_norm": 1.1776170050554418, + "kl": 0.2392578125, + "learning_rate": 1.4861967361004686e-06, + "loss": 0.002416463103145361, + "memory(GiB)": 146.12, + "reward": 1.768653154373169, + "reward_std": 0.19547991454601288, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4626978933811188, + "rewards/EvidenceHallucination/std": 0.39814016222953796, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 1.1168646812438965, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.60944664478302, + "rewards/VideoAccuracy/std": 0.4475680887699127, + "step": 345, + "train_speed(iter/s)": 0.01843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 571.0, + "completions/mean_length": 382.7857360839844, + "completions/min_length": 185.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.4453125, + "entropy/min": 0.287109375, + "epoch": 0.346, + "grad_norm": 1.6279893978244946, + "kl": 0.267578125, + "learning_rate": 1.4834212838845636e-06, + "loss": 0.0027270361315459013, + "memory(GiB)": 146.12, + "reward": 1.7766560316085815, + "reward_std": 0.3405471742153168, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5174556374549866, + "rewards/EvidenceHallucination/std": 0.40616753697395325, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 0.5868279337882996, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.6255457997322083, + "rewards/VideoAccuracy/std": 0.4321475327014923, + "step": 346, + "train_speed(iter/s)": 0.018437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 516.0, + "completions/mean_length": 401.952392578125, + "completions/min_length": 297.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.443359375, + "entropy/min": 0.283203125, + "epoch": 0.347, + "grad_norm": 1.4556253563734596, + "kl": 0.283203125, + "learning_rate": 1.4806409636216973e-06, + "loss": 0.002829810604453087, + "memory(GiB)": 146.12, + "reward": 1.8094658851623535, + "reward_std": 0.3720260560512543, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4109620749950409, + "rewards/EvidenceHallucination/std": 0.42193925380706787, + "rewards/Evidence_Num_Record/mean": 3.1666667461395264, + "rewards/Evidence_Num_Record/std": 0.5372316837310791, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6272733807563782, + "rewards/VideoAccuracy/std": 0.5630853772163391, + "step": 347, + "train_speed(iter/s)": 0.018433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 479.5714416503906, + "completions/min_length": 255.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.3515625, + "entropy/min": 0.126953125, + "epoch": 0.348, + "grad_norm": 1.2545353061090596, + "kl": 0.1962890625, + "learning_rate": 1.4778558033096631e-06, + "loss": 0.0021742568351328373, + "memory(GiB)": 146.12, + "reward": 1.7992125749588013, + "reward_std": 0.30752599239349365, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.3265472948551178, + "rewards/EvidenceHallucination/std": 0.43388867378234863, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.8621610999107361, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6291413307189941, + "rewards/VideoAccuracy/std": 0.40418609976768494, + "step": 348, + "train_speed(iter/s)": 0.018405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 573.0, + "completions/mean_length": 394.3333435058594, + "completions/min_length": 241.0, + "entropy/max": 0.640625, + "entropy/mean": 0.47265625, + "entropy/min": 0.279296875, + "epoch": 0.349, + "grad_norm": 1.5412409720252533, + "kl": 0.294921875, + "learning_rate": 1.475065830994995e-06, + "loss": 0.0029690172523260117, + "memory(GiB)": 146.12, + "reward": 1.7570728063583374, + "reward_std": 0.12371524423360825, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5494441390037537, + "rewards/EvidenceHallucination/std": 0.4043802320957184, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.5328903794288635, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.6186124682426453, + "rewards/VideoAccuracy/std": 0.4527876675128937, + "step": 349, + "train_speed(iter/s)": 0.018413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 539.0, + "completions/mean_length": 397.8095397949219, + "completions/min_length": 273.0, + "entropy/max": 0.6796875, + "entropy/mean": 0.44140625, + "entropy/min": 0.330078125, + "epoch": 0.35, + "grad_norm": 1.4894202446535798, + "kl": 0.2734375, + "learning_rate": 1.4722710747726827e-06, + "loss": 0.002753614215180278, + "memory(GiB)": 146.12, + "reward": 1.7065989971160889, + "reward_std": 0.36027830839157104, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42857760190963745, + "rewards/EvidenceHallucination/std": 0.4263991713523865, + "rewards/Evidence_Num_Record/mean": 3.2857143878936768, + "rewards/Evidence_Num_Record/std": 0.6730242371559143, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.563740611076355, + "rewards/VideoAccuracy/std": 0.5110309720039368, + "step": 350, + "train_speed(iter/s)": 0.018423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 740.0, + "completions/mean_length": 457.26190185546875, + "completions/min_length": 300.0, + "entropy/max": 0.875, + "entropy/mean": 0.37890625, + "entropy/min": 0.08740234375, + "epoch": 0.351, + "grad_norm": 1.3644218811234954, + "kl": 0.2255859375, + "learning_rate": 1.4694715627858908e-06, + "loss": 0.002268692012876272, + "memory(GiB)": 146.12, + "reward": 2.1510634422302246, + "reward_std": 0.22360464930534363, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4802687466144562, + "rewards/EvidenceHallucination/std": 0.3883917033672333, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.7948732376098633, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.8645331859588623, + "rewards/VideoAccuracy/std": 0.33970150351524353, + "step": 351, + "train_speed(iter/s)": 0.018436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 547.0, + "completions/mean_length": 436.8571472167969, + "completions/min_length": 341.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.49609375, + "entropy/min": 0.353515625, + "epoch": 0.352, + "grad_norm": 1.3208956824327278, + "kl": 0.2734375, + "learning_rate": 1.4666673232256737e-06, + "loss": 0.0027419989928603172, + "memory(GiB)": 146.12, + "reward": 1.7679606676101685, + "reward_std": 0.24578127264976501, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5064693689346313, + "rewards/EvidenceHallucination/std": 0.3789246678352356, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.6228330731391907, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711876034736633, + "step": 352, + "train_speed(iter/s)": 0.018404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 553.0, + "completions/mean_length": 401.7857360839844, + "completions/min_length": 260.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.46875, + "entropy/min": 0.353515625, + "epoch": 0.353, + "grad_norm": 1.600266503025139, + "kl": 0.271484375, + "learning_rate": 1.4638583843306926e-06, + "loss": 0.0027310168370604515, + "memory(GiB)": 146.12, + "reward": 1.419572114944458, + "reward_std": 0.20986318588256836, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.18792571127414703, + "rewards/EvidenceHallucination/std": 0.3676707148551941, + "rewards/Evidence_Num_Record/mean": 3.190476179122925, + "rewards/Evidence_Num_Record/std": 0.5054867267608643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.3200821578502655, + "rewards/VideoAccuracy/std": 0.4034067690372467, + "step": 353, + "train_speed(iter/s)": 0.018405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 519.0, + "completions/mean_length": 435.90478515625, + "completions/min_length": 273.0, + "entropy/max": 0.53125, + "entropy/mean": 0.404296875, + "entropy/min": 0.201171875, + "epoch": 0.354, + "grad_norm": 1.24190944601256, + "kl": 0.27734375, + "learning_rate": 1.4610447743869313e-06, + "loss": 0.0027868878096342087, + "memory(GiB)": 146.12, + "reward": 2.0171663761138916, + "reward_std": 0.1643851101398468, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4571863114833832, + "rewards/EvidenceHallucination/std": 0.4068146049976349, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.8006965517997742, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.785714328289032, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 0.7685860991477966, + "rewards/VideoAccuracy/std": 0.5048638582229614, + "step": 354, + "train_speed(iter/s)": 0.018414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 712.0, + "completions/mean_length": 472.5238037109375, + "completions/min_length": 285.0, + "entropy/max": 1.5859375, + "entropy/mean": 0.46484375, + "entropy/min": 0.1982421875, + "epoch": 0.355, + "grad_norm": 1.25497763523173, + "kl": 0.2275390625, + "learning_rate": 1.4582265217274103e-06, + "loss": 0.002319670282304287, + "memory(GiB)": 146.12, + "reward": 1.831558108329773, + "reward_std": 0.24632790684700012, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.32749611139297485, + "rewards/EvidenceHallucination/std": 0.32687005400657654, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.9106416702270508, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.7160588502883911, + "rewards/VideoAccuracy/std": 0.4168870151042938, + "step": 355, + "train_speed(iter/s)": 0.01841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 551.0, + "completions/mean_length": 414.4761962890625, + "completions/min_length": 238.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.45703125, + "entropy/min": 0.27734375, + "epoch": 0.356, + "grad_norm": 1.2905006201438967, + "kl": 0.2734375, + "learning_rate": 1.4554036547319032e-06, + "loss": 0.0027824200224131346, + "memory(GiB)": 146.12, + "reward": 1.5569792985916138, + "reward_std": 0.2874768376350403, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3323138952255249, + "rewards/EvidenceHallucination/std": 0.4390609562397003, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.5823577642440796, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.4286118447780609, + "rewards/VideoAccuracy/std": 0.42774662375450134, + "step": 356, + "train_speed(iter/s)": 0.01841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 544.0, + "completions/mean_length": 408.6428527832031, + "completions/min_length": 325.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.45703125, + "entropy/min": 0.349609375, + "epoch": 0.357, + "grad_norm": 1.4640627839195135, + "kl": 0.296875, + "learning_rate": 1.4525762018266483e-06, + "loss": 0.0029886546544730663, + "memory(GiB)": 146.12, + "reward": 1.5981336832046509, + "reward_std": 0.26221147179603577, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2828736901283264, + "rewards/EvidenceHallucination/std": 0.40860292315483093, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.47711870074272156, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.4415588974952698, + "rewards/VideoAccuracy/std": 0.4583386778831482, + "step": 357, + "train_speed(iter/s)": 0.018429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/mean_length": 441.3095397949219, + "completions/min_length": 238.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.359375, + "entropy/min": 0.076171875, + "epoch": 0.358, + "grad_norm": 1.0219281720960793, + "kl": 0.21875, + "learning_rate": 1.4497441914840657e-06, + "loss": 0.00220364797860384, + "memory(GiB)": 146.12, + "reward": 2.045262098312378, + "reward_std": 0.19890999794006348, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44825780391693115, + "rewards/EvidenceHallucination/std": 0.3905707001686096, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.7981540560722351, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8222770690917969, + "rewards/VideoAccuracy/std": 0.4843039810657501, + "step": 358, + "train_speed(iter/s)": 0.01838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 571.0, + "completions/mean_length": 430.5714416503906, + "completions/min_length": 291.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.455078125, + "entropy/min": 0.330078125, + "epoch": 0.359, + "grad_norm": 1.3777633606416004, + "kl": 0.279296875, + "learning_rate": 1.4469076522224682e-06, + "loss": 0.0028131790459156036, + "memory(GiB)": 146.12, + "reward": 1.8447340726852417, + "reward_std": 0.18595537543296814, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.556718647480011, + "rewards/EvidenceHallucination/std": 0.36465418338775635, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 0.8570944666862488, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.7048189043998718, + "rewards/VideoAccuracy/std": 0.41710811853408813, + "step": 359, + "train_speed(iter/s)": 0.018383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 545.0, + "completions/mean_length": 396.3333435058594, + "completions/min_length": 77.0, + "entropy/max": 0.62109375, + "entropy/mean": 0.44921875, + "entropy/min": 0.310546875, + "epoch": 0.36, + "grad_norm": 1.54607411816935, + "kl": 0.265625, + "learning_rate": 1.4440666126057741e-06, + "loss": 0.0026596831157803535, + "memory(GiB)": 146.12, + "reward": 1.447451114654541, + "reward_std": 0.31365180015563965, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.28275731205940247, + "rewards/EvidenceHallucination/std": 0.4097527265548706, + "rewards/Evidence_Num_Record/mean": 3.095238208770752, + "rewards/Evidence_Num_Record/std": 0.7261500358581543, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.355185329914093, + "rewards/VideoAccuracy/std": 0.3081769049167633, + "step": 360, + "train_speed(iter/s)": 0.018387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 627.0, + "completions/mean_length": 434.19049072265625, + "completions/min_length": 296.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.333984375, + "entropy/min": 0.154296875, + "epoch": 0.361, + "grad_norm": 1.3485145572791253, + "kl": 0.2421875, + "learning_rate": 1.4412211012432211e-06, + "loss": 0.002424489473924041, + "memory(GiB)": 146.12, + "reward": 2.1414265632629395, + "reward_std": 0.09960927814245224, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42047956585884094, + "rewards/EvidenceHallucination/std": 0.4049188792705536, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.49679577350616455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8620923757553101, + "rewards/VideoAccuracy/std": 0.4532416760921478, + "step": 361, + "train_speed(iter/s)": 0.018388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/mean_length": 459.5952453613281, + "completions/min_length": 198.0, + "entropy/max": 0.9140625, + "entropy/mean": 0.482421875, + "entropy/min": 0.22265625, + "epoch": 0.362, + "grad_norm": 1.4207204595844491, + "kl": 0.24609375, + "learning_rate": 1.4383711467890773e-06, + "loss": 0.002514174208045006, + "memory(GiB)": 146.12, + "reward": 1.8118293285369873, + "reward_std": 0.18444190919399261, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48771950602531433, + "rewards/EvidenceHallucination/std": 0.3601215183734894, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 2.155402660369873, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7142857313156128, + "rewards/VideoAccuracy/std": 0.45722997188568115, + "step": 362, + "train_speed(iter/s)": 0.018387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 701.0, + "completions/mean_length": 408.0714416503906, + "completions/min_length": 306.0, + "entropy/max": 0.5625, + "entropy/mean": 0.4453125, + "entropy/min": 0.265625, + "epoch": 0.363, + "grad_norm": 1.4738279134737888, + "kl": 0.279296875, + "learning_rate": 1.4355167779423524e-06, + "loss": 0.0028174584731459618, + "memory(GiB)": 146.12, + "reward": 1.5743744373321533, + "reward_std": 0.2246859073638916, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.26714199781417847, + "rewards/EvidenceHallucination/std": 0.3937433660030365, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.8621610999107361, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.46380311250686646, + "rewards/VideoAccuracy/std": 0.3977683186531067, + "step": 363, + "train_speed(iter/s)": 0.018393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 519.0, + "completions/mean_length": 406.0476379394531, + "completions/min_length": 270.0, + "entropy/max": 0.47265625, + "entropy/mean": 0.396484375, + "entropy/min": 0.2373046875, + "epoch": 0.364, + "grad_norm": 1.3102302939531227, + "kl": 0.275390625, + "learning_rate": 1.4326580234465083e-06, + "loss": 0.002773560583591461, + "memory(GiB)": 146.12, + "reward": 2.0157859325408936, + "reward_std": 0.21028606593608856, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3226657509803772, + "rewards/EvidenceHallucination/std": 0.4021158814430237, + "rewards/Evidence_Num_Record/mean": 3.261904716491699, + "rewards/Evidence_Num_Record/std": 0.49679577350616455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7845862507820129, + "rewards/VideoAccuracy/std": 0.5455461740493774, + "step": 364, + "train_speed(iter/s)": 0.018395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1009.0, + "completions/mean_length": 495.5714416503906, + "completions/min_length": 293.0, + "entropy/max": 1.1484375, + "entropy/mean": 0.47265625, + "entropy/min": 0.1728515625, + "epoch": 0.365, + "grad_norm": 1.2134787504452507, + "kl": 0.2158203125, + "learning_rate": 1.4297949120891716e-06, + "loss": 0.002195878652855754, + "memory(GiB)": 146.12, + "reward": 1.5721949338912964, + "reward_std": 0.3115766644477844, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1513204425573349, + "rewards/EvidenceHallucination/std": 0.2872285544872284, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.890129566192627, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.47526416182518005, + "rewards/VideoAccuracy/std": 0.5100093483924866, + "step": 365, + "train_speed(iter/s)": 0.018384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 594.0, + "completions/mean_length": 428.8333435058594, + "completions/min_length": 260.0, + "entropy/max": 0.72265625, + "entropy/mean": 0.45703125, + "entropy/min": 0.30078125, + "epoch": 0.366, + "grad_norm": 1.4126048684373906, + "kl": 0.27734375, + "learning_rate": 1.4269274727018417e-06, + "loss": 0.002791311126202345, + "memory(GiB)": 146.12, + "reward": 1.5688774585723877, + "reward_std": 0.2685014009475708, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29032036662101746, + "rewards/EvidenceHallucination/std": 0.3536563515663147, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.8981204032897949, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108052015304565, + "rewards/VideoAccuracy/mean": 0.4631943106651306, + "rewards/VideoAccuracy/std": 0.43064507842063904, + "step": 366, + "train_speed(iter/s)": 0.018385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 634.0, + "completions/mean_length": 449.9761962890625, + "completions/min_length": 330.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.412109375, + "entropy/min": 0.314453125, + "epoch": 0.367, + "grad_norm": 1.462879261342147, + "kl": 0.28515625, + "learning_rate": 1.4240557341596018e-06, + "loss": 0.0028515085577964783, + "memory(GiB)": 146.12, + "reward": 1.756030559539795, + "reward_std": 0.29996487498283386, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37230175733566284, + "rewards/EvidenceHallucination/std": 0.39328497648239136, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 0.6270147562026978, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.5863322019577026, + "rewards/VideoAccuracy/std": 0.5068567991256714, + "step": 367, + "train_speed(iter/s)": 0.01838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 792.0, + "completions/mean_length": 501.3095397949219, + "completions/min_length": 292.0, + "entropy/max": 0.83984375, + "entropy/mean": 0.357421875, + "entropy/min": 0.1416015625, + "epoch": 0.368, + "grad_norm": 1.313619431119182, + "kl": 0.21875, + "learning_rate": 1.4211797253808267e-06, + "loss": 0.002218235284090042, + "memory(GiB)": 146.12, + "reward": 1.8827509880065918, + "reward_std": 0.2703996002674103, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2761436700820923, + "rewards/EvidenceHallucination/std": 0.40353676676750183, + "rewards/Evidence_Num_Record/mean": 4.309524059295654, + "rewards/Evidence_Num_Record/std": 0.8968262672424316, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6989507079124451, + "rewards/VideoAccuracy/std": 0.5381085872650146, + "step": 368, + "train_speed(iter/s)": 0.018374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 798.0, + "completions/mean_length": 452.2857360839844, + "completions/min_length": 305.0, + "entropy/max": 1.203125, + "entropy/mean": 0.4609375, + "entropy/min": 0.291015625, + "epoch": 0.369, + "grad_norm": 1.525129645684774, + "kl": 0.26171875, + "learning_rate": 1.4182994753268926e-06, + "loss": 0.0026336682494729757, + "memory(GiB)": 146.12, + "reward": 1.810465931892395, + "reward_std": 0.322182834148407, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5285199284553528, + "rewards/EvidenceHallucination/std": 0.3964601457118988, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.1559572219848633, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.6761904954910278, + "rewards/VideoAccuracy/std": 0.45468270778656006, + "step": 369, + "train_speed(iter/s)": 0.018374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 759.0, + "completions/mean_length": 450.19049072265625, + "completions/min_length": 281.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.40625, + "entropy/min": 0.291015625, + "epoch": 0.37, + "grad_norm": 1.34759416208457, + "kl": 0.26171875, + "learning_rate": 1.4154150130018865e-06, + "loss": 0.002618623897433281, + "memory(GiB)": 146.12, + "reward": 1.4079593420028687, + "reward_std": 0.1855965107679367, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19020766019821167, + "rewards/EvidenceHallucination/std": 0.3501608073711395, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 1.2211835384368896, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.3080129325389862, + "rewards/VideoAccuracy/std": 0.49778202176094055, + "step": 370, + "train_speed(iter/s)": 0.018392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 995.0, + "completions/mean_length": 527.6666870117188, + "completions/min_length": 323.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.34375, + "entropy/min": 0.12890625, + "epoch": 0.371, + "grad_norm": 1.1374302781371923, + "kl": 0.2216796875, + "learning_rate": 1.4125263674523112e-06, + "loss": 0.002231322694569826, + "memory(GiB)": 146.12, + "reward": 2.184678316116333, + "reward_std": 0.33372053503990173, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4899149537086487, + "rewards/EvidenceHallucination/std": 0.42629364132881165, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.1716750860214233, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8866953253746033, + "rewards/VideoAccuracy/std": 0.4119216501712799, + "step": 371, + "train_speed(iter/s)": 0.018389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 934.0, + "completions/mean_length": 519.1666870117188, + "completions/min_length": 362.0, + "entropy/max": 1.6640625, + "entropy/mean": 0.61328125, + "entropy/min": 0.341796875, + "epoch": 0.372, + "grad_norm": 1.1000883432113129, + "kl": 0.255859375, + "learning_rate": 1.4096335677667951e-06, + "loss": 0.0026076710782945156, + "memory(GiB)": 146.12, + "reward": 1.547003149986267, + "reward_std": 0.36151957511901855, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35406267642974854, + "rewards/EvidenceHallucination/std": 0.3867344856262207, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 1.3445377349853516, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4761904776096344, + "rewards/VideoAccuracy/std": 0.5054867267608643, + "step": 372, + "train_speed(iter/s)": 0.018387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 804.0, + "completions/mean_length": 466.0476379394531, + "completions/min_length": 332.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.4375, + "entropy/min": 0.302734375, + "epoch": 0.373, + "grad_norm": 1.2749642753472195, + "kl": 0.25390625, + "learning_rate": 1.4067366430758004e-06, + "loss": 0.002570125972852111, + "memory(GiB)": 146.12, + "reward": 1.4805047512054443, + "reward_std": 0.2484472692012787, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3461710214614868, + "rewards/EvidenceHallucination/std": 0.45463377237319946, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 1.2087563276290894, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.35412758588790894, + "rewards/VideoAccuracy/std": 0.34480535984039307, + "step": 373, + "train_speed(iter/s)": 0.018394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 794.0, + "completions/mean_length": 484.3809509277344, + "completions/min_length": 326.0, + "entropy/max": 0.48046875, + "entropy/mean": 0.369140625, + "entropy/min": 0.1787109375, + "epoch": 0.374, + "grad_norm": 1.1626635492438668, + "kl": 0.26171875, + "learning_rate": 1.403835622551325e-06, + "loss": 0.00264472677372396, + "memory(GiB)": 146.12, + "reward": 2.1501784324645996, + "reward_std": 0.19828462600708008, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5695993304252625, + "rewards/EvidenceHallucination/std": 0.3984956741333008, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.0169869661331177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.785714328289032, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 0.879115641117096, + "rewards/VideoAccuracy/std": 0.5946195125579834, + "step": 374, + "train_speed(iter/s)": 0.018399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 666.6666870117188, + "completions/min_length": 344.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.37890625, + "entropy/min": 0.11572265625, + "epoch": 0.375, + "grad_norm": 1.171237322592966, + "kl": 0.1982421875, + "learning_rate": 1.4009305354066136e-06, + "loss": 0.0020399591885507107, + "memory(GiB)": 146.12, + "reward": 1.7457466125488281, + "reward_std": 0.4636306166648865, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3744157552719116, + "rewards/EvidenceHallucination/std": 0.35868147015571594, + "rewards/Evidence_Num_Record/mean": 5.761904716491699, + "rewards/Evidence_Num_Record/std": 2.3038623332977295, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6161016821861267, + "rewards/VideoAccuracy/std": 0.46276232600212097, + "step": 375, + "train_speed(iter/s)": 0.018389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 963.0, + "completions/mean_length": 457.3571472167969, + "completions/min_length": 332.0, + "entropy/max": 0.6640625, + "entropy/mean": 0.41796875, + "entropy/min": 0.302734375, + "epoch": 0.376, + "grad_norm": 1.4800288015731955, + "kl": 0.265625, + "learning_rate": 1.3980214108958624e-06, + "loss": 0.0026926174759864807, + "memory(GiB)": 146.12, + "reward": 1.857588529586792, + "reward_std": 0.12305565923452377, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5636403560638428, + "rewards/EvidenceHallucination/std": 0.43467506766319275, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.3216679096221924, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6900984644889832, + "rewards/VideoAccuracy/std": 0.4148041009902954, + "step": 376, + "train_speed(iter/s)": 0.018393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 785.0, + "completions/mean_length": 458.1428527832031, + "completions/min_length": 302.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.38671875, + "entropy/min": 0.26953125, + "epoch": 0.377, + "grad_norm": 1.189318645739725, + "kl": 0.259765625, + "learning_rate": 1.3951082783139218e-06, + "loss": 0.002627232577651739, + "memory(GiB)": 146.12, + "reward": 1.6650316715240479, + "reward_std": 0.19645211100578308, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28568822145462036, + "rewards/EvidenceHallucination/std": 0.39984413981437683, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.214507818222046, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.5197986364364624, + "rewards/VideoAccuracy/std": 0.5648055672645569, + "step": 377, + "train_speed(iter/s)": 0.018361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 889.0, + "completions/mean_length": 494.0476379394531, + "completions/min_length": 300.0, + "entropy/max": 0.67578125, + "entropy/mean": 0.353515625, + "entropy/min": 0.12890625, + "epoch": 0.378, + "grad_norm": 1.054499643623318, + "kl": 0.2021484375, + "learning_rate": 1.3921911669960054e-06, + "loss": 0.002041890984401107, + "memory(GiB)": 146.12, + "reward": 2.0080480575561523, + "reward_std": 0.15570762753486633, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.555682361125946, + "rewards/EvidenceHallucination/std": 0.3886148929595947, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.6792786121368408, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7635781764984131, + "rewards/VideoAccuracy/std": 0.44547799229621887, + "step": 378, + "train_speed(iter/s)": 0.018362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1564.0, + "completions/mean_length": 526.7857055664062, + "completions/min_length": 275.0, + "entropy/max": 1.8359375, + "entropy/mean": 0.498046875, + "entropy/min": 0.265625, + "epoch": 0.379, + "grad_norm": 1.4223091111930677, + "kl": 0.255859375, + "learning_rate": 1.3892701063173915e-06, + "loss": 0.002596626989543438, + "memory(GiB)": 146.12, + "reward": 1.986168622970581, + "reward_std": 0.16616535186767578, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6691222786903381, + "rewards/EvidenceHallucination/std": 0.3635716438293457, + "rewards/Evidence_Num_Record/mean": 5.595238208770752, + "rewards/Evidence_Num_Record/std": 3.7158825397491455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8190109133720398, + "rewards/VideoAccuracy/std": 0.34908995032310486, + "step": 379, + "train_speed(iter/s)": 0.018349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 743.0, + "completions/mean_length": 432.66668701171875, + "completions/min_length": 337.0, + "entropy/max": 0.50390625, + "entropy/mean": 0.396484375, + "entropy/min": 0.2431640625, + "epoch": 0.38, + "grad_norm": 1.4051032258402398, + "kl": 0.279296875, + "learning_rate": 1.3863451256931284e-06, + "loss": 0.002808385295793414, + "memory(GiB)": 146.12, + "reward": 1.8036223649978638, + "reward_std": 0.37080907821655273, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5079375505447388, + "rewards/EvidenceHallucination/std": 0.4352448582649231, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.9093654155731201, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6353679895401001, + "rewards/VideoAccuracy/std": 0.42980101704597473, + "step": 380, + "train_speed(iter/s)": 0.018353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1157.0, + "completions/mean_length": 514.8809814453125, + "completions/min_length": 346.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.306640625, + "entropy/min": 0.12255859375, + "epoch": 0.381, + "grad_norm": 1.130339014282929, + "kl": 0.21875, + "learning_rate": 1.3834162545777392e-06, + "loss": 0.0022574281319975853, + "memory(GiB)": 146.12, + "reward": 2.094353437423706, + "reward_std": 0.08381448686122894, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2632342278957367, + "rewards/EvidenceHallucination/std": 0.3826342523097992, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 2.3587875366210938, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8417066931724548, + "rewards/VideoAccuracy/std": 0.4787370562553406, + "step": 381, + "train_speed(iter/s)": 0.01835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 982.0, + "completions/mean_length": 588.3809814453125, + "completions/min_length": 370.0, + "entropy/max": 0.625, + "entropy/mean": 0.453125, + "entropy/min": 0.2890625, + "epoch": 0.382, + "grad_norm": 1.161291712057949, + "kl": 0.255859375, + "learning_rate": 1.380483522464923e-06, + "loss": 0.0025750526692718267, + "memory(GiB)": 146.12, + "reward": 1.6367465257644653, + "reward_std": 0.2806619107723236, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.445637583732605, + "rewards/EvidenceHallucination/std": 0.419691801071167, + "rewards/Evidence_Num_Record/mean": 5.809524059295654, + "rewards/Evidence_Num_Record/std": 1.8771811723709106, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5476190447807312, + "rewards/VideoAccuracy/std": 0.503760576248169, + "step": 382, + "train_speed(iter/s)": 0.018335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1099.0, + "completions/mean_length": 546.5, + "completions/min_length": 367.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.412109375, + "entropy/min": 0.28515625, + "epoch": 0.383, + "grad_norm": 1.1662744808643895, + "kl": 0.263671875, + "learning_rate": 1.3775469588872598e-06, + "loss": 0.00264447252266109, + "memory(GiB)": 146.12, + "reward": 1.3630290031433105, + "reward_std": 0.3358883559703827, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19906720519065857, + "rewards/EvidenceHallucination/std": 0.36768069863319397, + "rewards/Evidence_Num_Record/mean": 5.11904764175415, + "rewards/Evidence_Num_Record/std": 2.329056978225708, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.27321547269821167, + "rewards/VideoAccuracy/std": 0.3392495810985565, + "step": 383, + "train_speed(iter/s)": 0.018337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 807.0, + "completions/mean_length": 485.1190490722656, + "completions/min_length": 318.0, + "entropy/max": 0.490234375, + "entropy/mean": 0.361328125, + "entropy/min": 0.1796875, + "epoch": 0.384, + "grad_norm": 1.2736120355554539, + "kl": 0.2578125, + "learning_rate": 1.374606593415912e-06, + "loss": 0.0026027606800198555, + "memory(GiB)": 146.12, + "reward": 2.415849447250366, + "reward_std": 0.162336528301239, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5832903385162354, + "rewards/EvidenceHallucination/std": 0.3543720543384552, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 1.2010449171066284, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 1.1325247287750244, + "rewards/VideoAccuracy/std": 0.29747867584228516, + "step": 384, + "train_speed(iter/s)": 0.018336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 824.0, + "completions/mean_length": 495.90478515625, + "completions/min_length": 325.0, + "entropy/max": 1.1953125, + "entropy/mean": 0.419921875, + "entropy/min": 0.1904296875, + "epoch": 0.385, + "grad_norm": 1.3107778840499427, + "kl": 0.234375, + "learning_rate": 1.3716624556603274e-06, + "loss": 0.0023976964876055717, + "memory(GiB)": 146.12, + "reward": 2.003371238708496, + "reward_std": 0.3091083765029907, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5313931703567505, + "rewards/EvidenceHallucination/std": 0.3634487986564636, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.9906867742538452, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.7970927357673645, + "rewards/VideoAccuracy/std": 0.3665767312049866, + "step": 385, + "train_speed(iter/s)": 0.01834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 779.0, + "completions/mean_length": 496.452392578125, + "completions/min_length": 348.0, + "entropy/max": 0.59765625, + "entropy/mean": 0.41796875, + "entropy/min": 0.2890625, + "epoch": 0.386, + "grad_norm": 1.187078968146532, + "kl": 0.259765625, + "learning_rate": 1.3687145752679408e-06, + "loss": 0.0026177032850682735, + "memory(GiB)": 146.12, + "reward": 1.79196298122406, + "reward_std": 0.2241038978099823, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48707354068756104, + "rewards/EvidenceHallucination/std": 0.41782069206237793, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 1.148902416229248, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.6326434016227722, + "rewards/VideoAccuracy/std": 0.41035839915275574, + "step": 386, + "train_speed(iter/s)": 0.018337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 808.0, + "completions/mean_length": 460.3333435058594, + "completions/min_length": 297.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.40625, + "entropy/min": 0.23046875, + "epoch": 0.387, + "grad_norm": 1.3726077305695579, + "kl": 0.259765625, + "learning_rate": 1.3657629819238745e-06, + "loss": 0.0026217461563646793, + "memory(GiB)": 146.12, + "reward": 1.9099597930908203, + "reward_std": 0.43789178133010864, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4705699682235718, + "rewards/EvidenceHallucination/std": 0.40831753611564636, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.234426736831665, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.7158457040786743, + "rewards/VideoAccuracy/std": 0.55631422996521, + "step": 387, + "train_speed(iter/s)": 0.018342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 928.0, + "completions/mean_length": 487.3095397949219, + "completions/min_length": 289.0, + "entropy/max": 0.74609375, + "entropy/mean": 0.322265625, + "entropy/min": 0.1796875, + "epoch": 0.388, + "grad_norm": 1.095365935281915, + "kl": 0.22265625, + "learning_rate": 1.3628077053506407e-06, + "loss": 0.0022502231877297163, + "memory(GiB)": 146.12, + "reward": 2.0858771800994873, + "reward_std": 0.3169812858104706, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48037728667259216, + "rewards/EvidenceHallucination/std": 0.42832738161087036, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.938814401626587, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8231350183486938, + "rewards/VideoAccuracy/std": 0.46208032965660095, + "step": 388, + "train_speed(iter/s)": 0.018334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 958.0, + "completions/mean_length": 554.952392578125, + "completions/min_length": 395.0, + "entropy/max": 1.6796875, + "entropy/mean": 0.435546875, + "entropy/min": 0.287109375, + "epoch": 0.389, + "grad_norm": 1.1942709574591455, + "kl": 0.259765625, + "learning_rate": 1.3598487753078426e-06, + "loss": 0.002639633370563388, + "memory(GiB)": 146.12, + "reward": 1.587154746055603, + "reward_std": 0.2010001242160797, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.36558741331100464, + "rewards/EvidenceHallucination/std": 0.40262478590011597, + "rewards/Evidence_Num_Record/mean": 5.476190567016602, + "rewards/Evidence_Num_Record/std": 1.8244690895080566, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.48546579480171204, + "rewards/VideoAccuracy/std": 0.47066184878349304, + "step": 389, + "train_speed(iter/s)": 0.018334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 720.0, + "completions/mean_length": 464.8333435058594, + "completions/min_length": 323.0, + "entropy/max": 0.546875, + "entropy/mean": 0.431640625, + "entropy/min": 0.296875, + "epoch": 0.39, + "grad_norm": 1.4047039030677193, + "kl": 0.296875, + "learning_rate": 1.3568862215918717e-06, + "loss": 0.0029980493709445, + "memory(GiB)": 146.12, + "reward": 1.6332415342330933, + "reward_std": 0.23186847567558289, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3746386468410492, + "rewards/EvidenceHallucination/std": 0.4456513822078705, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.4618651866912842, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.4916471838951111, + "rewards/VideoAccuracy/std": 0.45274612307548523, + "step": 390, + "train_speed(iter/s)": 0.018337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 774.0, + "completions/mean_length": 513.9285888671875, + "completions/min_length": 364.0, + "entropy/max": 0.490234375, + "entropy/mean": 0.3203125, + "entropy/min": 0.14453125, + "epoch": 0.391, + "grad_norm": 1.05864387239661, + "kl": 0.2158203125, + "learning_rate": 1.3539200740356119e-06, + "loss": 0.002177801914513111, + "memory(GiB)": 146.12, + "reward": 2.3062353134155273, + "reward_std": 0.14617519080638885, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5758527517318726, + "rewards/EvidenceHallucination/std": 0.3537074327468872, + "rewards/Evidence_Num_Record/mean": 4.690476417541504, + "rewards/Evidence_Num_Record/std": 1.3157228231430054, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9958263635635376, + "rewards/VideoAccuracy/std": 0.3850289285182953, + "step": 391, + "train_speed(iter/s)": 0.018335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1077.0, + "completions/mean_length": 559.7857055664062, + "completions/min_length": 346.0, + "entropy/max": 0.94921875, + "entropy/mean": 0.486328125, + "entropy/min": 0.291015625, + "epoch": 0.392, + "grad_norm": 1.2034454735474485, + "kl": 0.263671875, + "learning_rate": 1.3509503625081357e-06, + "loss": 0.0026875040493905544, + "memory(GiB)": 146.12, + "reward": 1.6635810136795044, + "reward_std": 0.2549924850463867, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4607619345188141, + "rewards/EvidenceHallucination/std": 0.4197237193584442, + "rewards/Evidence_Num_Record/mean": 5.6666669845581055, + "rewards/Evidence_Num_Record/std": 1.8566515445709229, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5714285969734192, + "rewards/VideoAccuracy/std": 0.5008702874183655, + "step": 392, + "train_speed(iter/s)": 0.018332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 816.0, + "completions/mean_length": 532.6666870117188, + "completions/min_length": 385.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.447265625, + "entropy/min": 0.333984375, + "epoch": 0.393, + "grad_norm": 1.3161624316795113, + "kl": 0.271484375, + "learning_rate": 1.347977116914405e-06, + "loss": 0.0027165599167346954, + "memory(GiB)": 146.12, + "reward": 1.473059058189392, + "reward_std": 0.38854551315307617, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23351234197616577, + "rewards/EvidenceHallucination/std": 0.38533180952072144, + "rewards/Evidence_Num_Record/mean": 4.928571701049805, + "rewards/Evidence_Num_Record/std": 1.8398417234420776, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.3787374198436737, + "rewards/VideoAccuracy/std": 0.34927859902381897, + "step": 393, + "train_speed(iter/s)": 0.018333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 664.0, + "completions/mean_length": 492.69049072265625, + "completions/min_length": 355.0, + "entropy/max": 0.494140625, + "entropy/mean": 0.359375, + "entropy/min": 0.1953125, + "epoch": 0.394, + "grad_norm": 1.1185187742778906, + "kl": 0.26953125, + "learning_rate": 1.3450003671949705e-06, + "loss": 0.0027246675454080105, + "memory(GiB)": 146.12, + "reward": 2.0985453128814697, + "reward_std": 0.11049169301986694, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6033493280410767, + "rewards/EvidenceHallucination/std": 0.43722671270370483, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.0155583620071411, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8112086653709412, + "rewards/VideoAccuracy/std": 0.5243479013442993, + "step": 394, + "train_speed(iter/s)": 0.01834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 664.952392578125, + "completions/min_length": 418.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.34765625, + "entropy/min": 0.0947265625, + "epoch": 0.395, + "grad_norm": 0.7729392430737445, + "kl": 0.2109375, + "learning_rate": 1.3420201433256689e-06, + "loss": 0.0022313406225293875, + "memory(GiB)": 146.12, + "reward": 1.817870020866394, + "reward_std": 0.1385781168937683, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.34599390625953674, + "rewards/EvidenceHallucination/std": 0.3843267858028412, + "rewards/Evidence_Num_Record/mean": 5.976190567016602, + "rewards/Evidence_Num_Record/std": 2.1582298278808594, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6939092874526978, + "rewards/VideoAccuracy/std": 0.5304246544837952, + "step": 395, + "train_speed(iter/s)": 0.018326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 715.0, + "completions/mean_length": 486.8333435058594, + "completions/min_length": 342.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.408203125, + "entropy/min": 0.283203125, + "epoch": 0.396, + "grad_norm": 1.1781912522495772, + "kl": 0.265625, + "learning_rate": 1.3390364753173204e-06, + "loss": 0.0026801545172929764, + "memory(GiB)": 146.12, + "reward": 1.6279311180114746, + "reward_std": 0.1566619575023651, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37344738841056824, + "rewards/EvidenceHallucination/std": 0.42502158880233765, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.086556077003479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.49133700132369995, + "rewards/VideoAccuracy/std": 0.4137079119682312, + "step": 396, + "train_speed(iter/s)": 0.018329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 581.0, + "completions/mean_length": 453.5952453613281, + "completions/min_length": 318.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.41796875, + "entropy/min": 0.216796875, + "epoch": 0.397, + "grad_norm": 1.0415509961065834, + "kl": 0.283203125, + "learning_rate": 1.33604939321543e-06, + "loss": 0.003073825966566801, + "memory(GiB)": 146.12, + "reward": 1.510471224784851, + "reward_std": 0.15900374948978424, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29543644189834595, + "rewards/EvidenceHallucination/std": 0.4081650674343109, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 0.5763435363769531, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.35138386487960815, + "rewards/VideoAccuracy/std": 0.4781220853328705, + "step": 397, + "train_speed(iter/s)": 0.018335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 737.0, + "completions/mean_length": 488.5714416503906, + "completions/min_length": 368.0, + "entropy/max": 1.0234375, + "entropy/mean": 0.337890625, + "entropy/min": 0.150390625, + "epoch": 0.398, + "grad_norm": 1.1410838956719997, + "kl": 0.21875, + "learning_rate": 1.3330589270998806e-06, + "loss": 0.002219142857939005, + "memory(GiB)": 146.12, + "reward": 2.0311450958251953, + "reward_std": 0.23200544714927673, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3149089217185974, + "rewards/EvidenceHallucination/std": 0.3664127290248871, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 1.3278050422668457, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8348298072814941, + "rewards/VideoAccuracy/std": 0.4276603162288666, + "step": 398, + "train_speed(iter/s)": 0.018334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1194.0, + "completions/mean_length": 560.047607421875, + "completions/min_length": 364.0, + "entropy/max": 0.72265625, + "entropy/mean": 0.400390625, + "entropy/min": 0.26953125, + "epoch": 0.399, + "grad_norm": 1.1453943649734857, + "kl": 0.275390625, + "learning_rate": 1.3300651070846331e-06, + "loss": 0.0027932848315685987, + "memory(GiB)": 146.12, + "reward": 1.3441669940948486, + "reward_std": 0.3663594424724579, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.167385995388031, + "rewards/EvidenceHallucination/std": 0.32827678322792053, + "rewards/Evidence_Num_Record/mean": 5.88095235824585, + "rewards/Evidence_Num_Record/std": 2.2760939598083496, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.27735647559165955, + "rewards/VideoAccuracy/std": 0.43789809942245483, + "step": 399, + "train_speed(iter/s)": 0.018336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 606.1904907226562, + "completions/min_length": 365.0, + "entropy/max": 0.6875, + "entropy/mean": 0.404296875, + "entropy/min": 0.06396484375, + "epoch": 0.4, + "grad_norm": 0.9919679351042843, + "kl": 0.2333984375, + "learning_rate": 1.3270679633174217e-06, + "loss": 0.0026555825024843216, + "memory(GiB)": 146.12, + "reward": 1.5256381034851074, + "reward_std": 0.3579689860343933, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22879035770893097, + "rewards/EvidenceHallucination/std": 0.37589502334594727, + "rewards/Evidence_Num_Record/mean": 4.690476417541504, + "rewards/Evidence_Num_Record/std": 1.689105749130249, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.43702271580696106, + "rewards/VideoAccuracy/std": 0.5390593409538269, + "step": 400, + "train_speed(iter/s)": 0.018312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 638.0, + "completions/mean_length": 449.69049072265625, + "completions/min_length": 320.0, + "entropy/max": 0.4609375, + "entropy/mean": 0.302734375, + "entropy/min": 0.146484375, + "epoch": 0.401, + "grad_norm": 1.139445341067388, + "kl": 0.2490234375, + "learning_rate": 1.3240675259794504e-06, + "loss": 0.0025015901774168015, + "memory(GiB)": 146.12, + "reward": 2.1623573303222656, + "reward_std": 0.13159291446208954, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37520307302474976, + "rewards/EvidenceHallucination/std": 0.44481533765792847, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.7624309062957764, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9285714626312256, + "rewards/HonestTime/std": 0.26066118478775024, + "rewards/VideoAccuracy/mean": 0.9016023278236389, + "rewards/VideoAccuracy/std": 0.3952128291130066, + "step": 401, + "train_speed(iter/s)": 0.018272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 736.357177734375, + "completions/min_length": 312.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.404296875, + "entropy/min": 0.107421875, + "epoch": 0.402, + "grad_norm": 1.2138014727028978, + "kl": 0.2197265625, + "learning_rate": 1.3210638252850906e-06, + "loss": 0.0023418040946125984, + "memory(GiB)": 146.12, + "reward": 1.741194486618042, + "reward_std": 0.3397737145423889, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4916861057281494, + "rewards/EvidenceHallucination/std": 0.3802826404571533, + "rewards/Evidence_Num_Record/mean": 7.428571701049805, + "rewards/Evidence_Num_Record/std": 3.6702020168304443, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 402, + "train_speed(iter/s)": 0.018248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/mean_length": 517.8809814453125, + "completions/min_length": 367.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.4296875, + "entropy/min": 0.33203125, + "epoch": 0.403, + "grad_norm": 1.273083723325503, + "kl": 0.279296875, + "learning_rate": 1.318056891481575e-06, + "loss": 0.0028203255496919155, + "memory(GiB)": 146.12, + "reward": 1.5516564846038818, + "reward_std": 0.24027308821678162, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20131969451904297, + "rewards/EvidenceHallucination/std": 0.36238354444503784, + "rewards/Evidence_Num_Record/mean": 5.1666669845581055, + "rewards/Evidence_Num_Record/std": 1.5913894176483154, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.4542495310306549, + "rewards/VideoAccuracy/std": 0.3473469913005829, + "step": 403, + "train_speed(iter/s)": 0.018256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 553.1190795898438, + "completions/min_length": 319.0, + "entropy/max": 0.62109375, + "entropy/mean": 0.345703125, + "entropy/min": 0.0908203125, + "epoch": 0.404, + "grad_norm": 1.1971248641583463, + "kl": 0.25, + "learning_rate": 1.3150467548486928e-06, + "loss": 0.0026676864363253117, + "memory(GiB)": 146.12, + "reward": 2.1506330966949463, + "reward_std": 0.2771437466144562, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5300754904747009, + "rewards/EvidenceHallucination/std": 0.39288297295570374, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.2308934926986694, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8898561000823975, + "rewards/VideoAccuracy/std": 0.3947162926197052, + "step": 404, + "train_speed(iter/s)": 0.018233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 579.5238037109375, + "completions/min_length": 295.0, + "entropy/max": 1.25, + "entropy/mean": 0.42578125, + "entropy/min": 0.16015625, + "epoch": 0.405, + "grad_norm": 1.055141020743444, + "kl": 0.2431640625, + "learning_rate": 1.3120334456984869e-06, + "loss": 0.0025412007234990597, + "memory(GiB)": 146.12, + "reward": 1.7340662479400635, + "reward_std": 0.20672297477722168, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44430071115493774, + "rewards/EvidenceHallucination/std": 0.4083597660064697, + "rewards/Evidence_Num_Record/mean": 5.6666669845581055, + "rewards/Evidence_Num_Record/std": 2.9686710834503174, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5904439687728882, + "rewards/VideoAccuracy/std": 0.4767262041568756, + "step": 405, + "train_speed(iter/s)": 0.018211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 726.0, + "completions/mean_length": 476.40478515625, + "completions/min_length": 331.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.435546875, + "entropy/min": 0.31640625, + "epoch": 0.406, + "grad_norm": 1.2421606970845038, + "kl": 0.302734375, + "learning_rate": 1.3090169943749473e-06, + "loss": 0.003044125158339739, + "memory(GiB)": 146.12, + "reward": 1.8416240215301514, + "reward_std": 0.2010241448879242, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48798543214797974, + "rewards/EvidenceHallucination/std": 0.421680212020874, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.21091628074646, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.6535505652427673, + "rewards/VideoAccuracy/std": 0.36351004242897034, + "step": 406, + "train_speed(iter/s)": 0.018217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 570.5952758789062, + "completions/min_length": 327.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.3984375, + "entropy/min": 0.052978515625, + "epoch": 0.407, + "grad_norm": 1.032223184887646, + "kl": 0.271484375, + "learning_rate": 1.3059974312537052e-06, + "loss": 0.002970391418784857, + "memory(GiB)": 146.12, + "reward": 1.5923843383789062, + "reward_std": 0.30783599615097046, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3410428762435913, + "rewards/EvidenceHallucination/std": 0.42950204014778137, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 0.9084070324897766, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.45274725556373596, + "rewards/VideoAccuracy/std": 0.5236507058143616, + "step": 407, + "train_speed(iter/s)": 0.018193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 572.5952758789062, + "completions/min_length": 336.0, + "entropy/max": 0.9140625, + "entropy/mean": 0.306640625, + "entropy/min": 0.0849609375, + "epoch": 0.408, + "grad_norm": 1.0113628742819467, + "kl": 0.2041015625, + "learning_rate": 1.3029747867417273e-06, + "loss": 0.002208232879638672, + "memory(GiB)": 146.12, + "reward": 1.915709376335144, + "reward_std": 0.3583010137081146, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31953245401382446, + "rewards/EvidenceHallucination/std": 0.42113471031188965, + "rewards/Evidence_Num_Record/mean": 4.857142925262451, + "rewards/Evidence_Num_Record/std": 3.9482717514038086, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.7470409274101257, + "rewards/VideoAccuracy/std": 0.47045883536338806, + "step": 408, + "train_speed(iter/s)": 0.018173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 566.2857055664062, + "completions/min_length": 308.0, + "entropy/max": 2.515625, + "entropy/mean": 0.50390625, + "entropy/min": 0.0927734375, + "epoch": 0.409, + "grad_norm": 1.458716353547437, + "kl": 0.28515625, + "learning_rate": 1.2999490912770106e-06, + "loss": 0.002989646978676319, + "memory(GiB)": 146.12, + "reward": 1.6135457754135132, + "reward_std": 0.3748294711112976, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35370171070098877, + "rewards/EvidenceHallucination/std": 0.43925637006759644, + "rewards/Evidence_Num_Record/mean": 4.904761791229248, + "rewards/Evidence_Num_Record/std": 1.4784554243087769, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5261387228965759, + "rewards/VideoAccuracy/std": 0.4697481393814087, + "step": 409, + "train_speed(iter/s)": 0.018157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 568.952392578125, + "completions/min_length": 349.0, + "entropy/max": 0.73046875, + "entropy/mean": 0.40625, + "entropy/min": 0.10498046875, + "epoch": 0.41, + "grad_norm": 1.3565337752460667, + "kl": 0.271484375, + "learning_rate": 1.296920375328275e-06, + "loss": 0.002834528684616089, + "memory(GiB)": 146.8, + "reward": 1.4560307264328003, + "reward_std": 0.29307645559310913, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29645535349845886, + "rewards/EvidenceHallucination/std": 0.4277758300304413, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.0548268556594849, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.35388249158859253, + "rewards/VideoAccuracy/std": 0.3094964623451233, + "step": 410, + "train_speed(iter/s)": 0.018145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07142857142857142, + "completions/max_length": 2625.0, + "completions/mean_length": 630.7857055664062, + "completions/min_length": 342.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.2734375, + "entropy/min": 0.0673828125, + "epoch": 0.411, + "grad_norm": 1.0384389324702676, + "kl": 0.1962890625, + "learning_rate": 1.293888669394656e-06, + "loss": 0.0023590796627104282, + "memory(GiB)": 147.17, + "reward": 2.157050848007202, + "reward_std": 0.44932615756988525, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.507347822189331, + "rewards/EvidenceHallucination/std": 0.4063733220100403, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 0.8900012969970703, + "rewards/Format/mean": 0.9285714626312256, + "rewards/Format/std": 0.26066118478775024, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8912954926490784, + "rewards/VideoAccuracy/std": 0.4708878993988037, + "step": 411, + "train_speed(iter/s)": 0.018115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.09523809523809523, + "completions/max_length": 2625.0, + "completions/mean_length": 697.3333740234375, + "completions/min_length": 243.0, + "entropy/max": 2.59375, + "entropy/mean": 0.447265625, + "entropy/min": 0.09765625, + "epoch": 0.412, + "grad_norm": 0.9814914235030084, + "kl": 0.2412109375, + "learning_rate": 1.290854004005399e-06, + "loss": 0.0028356886468827724, + "memory(GiB)": 147.17, + "reward": 1.6398093700408936, + "reward_std": 0.3727138936519623, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46095216274261475, + "rewards/EvidenceHallucination/std": 0.40103238821029663, + "rewards/Evidence_Num_Record/mean": 4.857142925262451, + "rewards/Evidence_Num_Record/std": 1.523337721824646, + "rewards/Format/mean": 0.9047619104385376, + "rewards/Format/std": 0.297101765871048, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679577350616455, + "step": 412, + "train_speed(iter/s)": 0.018094 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 747.0, + "completions/mean_length": 494.66668701171875, + "completions/min_length": 297.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.396484375, + "entropy/min": 0.216796875, + "epoch": 0.413, + "grad_norm": 1.0223745329810805, + "kl": 0.283203125, + "learning_rate": 1.287816409719551e-06, + "loss": 0.0028338914271444082, + "memory(GiB)": 147.17, + "reward": 1.2753490209579468, + "reward_std": 0.13075286149978638, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.11048247665166855, + "rewards/EvidenceHallucination/std": 0.27473148703575134, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.0852190256118774, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.19610968232154846, + "rewards/VideoAccuracy/std": 0.2764948904514313, + "step": 413, + "train_speed(iter/s)": 0.018114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 503.71429443359375, + "completions/min_length": 304.0, + "entropy/max": 0.703125, + "entropy/mean": 0.37890625, + "entropy/min": 0.08837890625, + "epoch": 0.414, + "grad_norm": 1.09227655023033, + "kl": 0.294921875, + "learning_rate": 1.2847759171256522e-06, + "loss": 0.0031100395135581493, + "memory(GiB)": 147.17, + "reward": 2.3160693645477295, + "reward_std": 0.24112042784690857, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6798633337020874, + "rewards/EvidenceHallucination/std": 0.35106703639030457, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.8379085063934326, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 1.0253347158432007, + "rewards/VideoAccuracy/std": 0.35029590129852295, + "step": 414, + "train_speed(iter/s)": 0.018096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 536.547607421875, + "completions/min_length": 238.0, + "entropy/max": 1.609375, + "entropy/mean": 0.4453125, + "entropy/min": 0.1435546875, + "epoch": 0.415, + "grad_norm": 1.0731366322658153, + "kl": 0.255859375, + "learning_rate": 1.2817325568414297e-06, + "loss": 0.002732915338128805, + "memory(GiB)": 147.17, + "reward": 1.9930468797683716, + "reward_std": 0.25008612871170044, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48213377594947815, + "rewards/EvidenceHallucination/std": 0.43128907680511475, + "rewards/Evidence_Num_Record/mean": 4.88095235824585, + "rewards/Evidence_Num_Record/std": 2.1775169372558594, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.8466201424598694, + "rewards/VideoAccuracy/std": 0.3688356876373291, + "step": 415, + "train_speed(iter/s)": 0.018086 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 700.0, + "completions/mean_length": 436.0238037109375, + "completions/min_length": 319.0, + "entropy/max": 0.494140625, + "entropy/mean": 0.384765625, + "entropy/min": 0.236328125, + "epoch": 0.416, + "grad_norm": 1.42174973703141, + "kl": 0.30859375, + "learning_rate": 1.2786863595134878e-06, + "loss": 0.0030953167006373405, + "memory(GiB)": 147.17, + "reward": 1.2248696088790894, + "reward_std": 0.30835697054862976, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.10754480957984924, + "rewards/EvidenceHallucination/std": 0.2973156273365021, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 1.0473682880401611, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2142857164144516, + "rewards/HonestTime/std": 0.4152997136116028, + "rewards/VideoAccuracy/mean": 0.16050346195697784, + "rewards/VideoAccuracy/std": 0.30264562368392944, + "step": 416, + "train_speed(iter/s)": 0.018087 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 620.0, + "completions/mean_length": 423.3571472167969, + "completions/min_length": 314.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.384765625, + "entropy/min": 0.259765625, + "epoch": 0.417, + "grad_norm": 1.346342598424452, + "kl": 0.345703125, + "learning_rate": 1.275637355816999e-06, + "loss": 0.003472857875749469, + "memory(GiB)": 147.17, + "reward": 1.6389472484588623, + "reward_std": 0.2763007581233978, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32168424129486084, + "rewards/EvidenceHallucination/std": 0.4292060136795044, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.686691164970398, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.4793723523616791, + "rewards/VideoAccuracy/std": 0.4684654474258423, + "step": 417, + "train_speed(iter/s)": 0.018089 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 601.452392578125, + "completions/min_length": 278.0, + "entropy/max": 1.5, + "entropy/mean": 0.291015625, + "entropy/min": 0.103515625, + "epoch": 0.418, + "grad_norm": 1.0149165496464008, + "kl": 0.21484375, + "learning_rate": 1.2725855764553978e-06, + "loss": 0.0024170055985450745, + "memory(GiB)": 147.17, + "reward": 2.131437063217163, + "reward_std": 0.29418548941612244, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3117740750312805, + "rewards/EvidenceHallucination/std": 0.4131038188934326, + "rewards/Evidence_Num_Record/mean": 4.6666669845581055, + "rewards/Evidence_Num_Record/std": 1.9959309101104736, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.9643202424049377, + "rewards/VideoAccuracy/std": 0.3569818437099457, + "step": 418, + "train_speed(iter/s)": 0.018071 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 708.0, + "completions/mean_length": 457.3095397949219, + "completions/min_length": 312.0, + "entropy/max": 1.203125, + "entropy/mean": 0.439453125, + "entropy/min": 0.27734375, + "epoch": 0.419, + "grad_norm": 1.4330677765387068, + "kl": 0.328125, + "learning_rate": 1.269531052160068e-06, + "loss": 0.003306722268462181, + "memory(GiB)": 147.17, + "reward": 1.9174654483795166, + "reward_std": 0.1975611299276352, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5992318391799927, + "rewards/EvidenceHallucination/std": 0.35757526755332947, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.1305595636367798, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.764285683631897, + "rewards/VideoAccuracy/std": 0.38623979687690735, + "step": 419, + "train_speed(iter/s)": 0.01806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/mean_length": 423.5952453613281, + "completions/min_length": 262.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.37890625, + "entropy/min": 0.1484375, + "epoch": 0.42, + "grad_norm": 1.3890351791493056, + "kl": 0.326171875, + "learning_rate": 1.2664738136900348e-06, + "loss": 0.003325998317450285, + "memory(GiB)": 147.17, + "reward": 1.5204228162765503, + "reward_std": 0.22484032809734344, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2736830413341522, + "rewards/EvidenceHallucination/std": 0.41822507977485657, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.66999751329422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.39901959896087646, + "rewards/VideoAccuracy/std": 0.4555363059043884, + "step": 420, + "train_speed(iter/s)": 0.018069 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/mean_length": 464.19049072265625, + "completions/min_length": 290.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.28515625, + "entropy/min": 0.1318359375, + "epoch": 0.421, + "grad_norm": 1.059510007020288, + "kl": 0.267578125, + "learning_rate": 1.2634138918316565e-06, + "loss": 0.0027240943163633347, + "memory(GiB)": 147.17, + "reward": 2.171023368835449, + "reward_std": 0.1849653422832489, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46852439641952515, + "rewards/EvidenceHallucination/std": 0.40686750411987305, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.7265497446060181, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8820803761482239, + "rewards/VideoAccuracy/std": 0.4453311562538147, + "step": 421, + "train_speed(iter/s)": 0.018071 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/mean_length": 505.0714416503906, + "completions/min_length": 286.0, + "entropy/max": 1.2421875, + "entropy/mean": 0.447265625, + "entropy/min": 0.1787109375, + "epoch": 0.422, + "grad_norm": 1.0522285569899024, + "kl": 0.328125, + "learning_rate": 1.260351317398312e-06, + "loss": 0.0034251343458890915, + "memory(GiB)": 147.17, + "reward": 1.7768745422363281, + "reward_std": 0.014493129216134548, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5510392189025879, + "rewards/EvidenceHallucination/std": 0.4110569953918457, + "rewards/Evidence_Num_Record/mean": 4.952381134033203, + "rewards/Evidence_Num_Record/std": 2.0115809440612793, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711876034736633, + "step": 422, + "train_speed(iter/s)": 0.018061 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 489.3095397949219, + "completions/min_length": 296.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.400390625, + "entropy/min": 0.095703125, + "epoch": 0.423, + "grad_norm": 1.1106053463889534, + "kl": 0.296875, + "learning_rate": 1.2572861212300916e-06, + "loss": 0.0031044986099004745, + "memory(GiB)": 147.17, + "reward": 1.2903608083724976, + "reward_std": 0.28117311000823975, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13605429232120514, + "rewards/EvidenceHallucination/std": 0.3380788266658783, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 1.0040568113327026, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.20838797092437744, + "rewards/VideoAccuracy/std": 0.30370450019836426, + "step": 423, + "train_speed(iter/s)": 0.018054 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.07142857142857142, + "completions/max_length": 2625.0, + "completions/mean_length": 630.0952758789062, + "completions/min_length": 291.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.353515625, + "entropy/min": 0.08447265625, + "epoch": 0.424, + "grad_norm": 0.9851445551763302, + "kl": 0.2255859375, + "learning_rate": 1.2542183341934871e-06, + "loss": 0.002961507998406887, + "memory(GiB)": 147.17, + "reward": 2.1810081005096436, + "reward_std": 0.5009894371032715, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7007943987846375, + "rewards/EvidenceHallucination/std": 0.3857671320438385, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.7265497446060181, + "rewards/Format/mean": 0.9285714626312256, + "rewards/Format/std": 0.26066118478775024, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9098968505859375, + "rewards/VideoAccuracy/std": 0.4694446921348572, + "step": 424, + "train_speed(iter/s)": 0.018035 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1064.0, + "completions/mean_length": 563.0, + "completions/min_length": 316.0, + "entropy/max": 0.7734375, + "entropy/mean": 0.36328125, + "entropy/min": 0.1357421875, + "epoch": 0.425, + "grad_norm": 0.8763542519990872, + "kl": 0.2578125, + "learning_rate": 1.251147987181079e-06, + "loss": 0.002668556524440646, + "memory(GiB)": 147.17, + "reward": 1.6808499097824097, + "reward_std": 0.125533327460289, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.11130917072296143, + "rewards/EvidenceHallucination/std": 0.2784552574157715, + "rewards/Evidence_Num_Record/mean": 5.357142925262451, + "rewards/Evidence_Num_Record/std": 2.0579581260681152, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.591921329498291, + "rewards/VideoAccuracy/std": 0.582549512386322, + "step": 425, + "train_speed(iter/s)": 0.01804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 481.8571472167969, + "completions/min_length": 252.0, + "entropy/max": 0.6875, + "entropy/mean": 0.361328125, + "entropy/min": 0.103515625, + "epoch": 0.426, + "grad_norm": 0.9850528322219039, + "kl": 0.310546875, + "learning_rate": 1.248075111111229e-06, + "loss": 0.0032261803280562162, + "memory(GiB)": 147.17, + "reward": 1.2789486646652222, + "reward_std": 0.1393444687128067, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.11669065803289413, + "rewards/EvidenceHallucination/std": 0.25353145599365234, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 1.630323886871338, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.2056104838848114, + "rewards/VideoAccuracy/std": 0.28642597794532776, + "step": 426, + "train_speed(iter/s)": 0.018032 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 530.0, + "completions/mean_length": 395.3571472167969, + "completions/min_length": 307.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.3828125, + "entropy/min": 0.2314453125, + "epoch": 0.427, + "grad_norm": 1.00042389463066, + "kl": 0.365234375, + "learning_rate": 1.244999736927764e-06, + "loss": 0.00365253328345716, + "memory(GiB)": 147.17, + "reward": 1.7335829734802246, + "reward_std": 0.2119014859199524, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39727094769477844, + "rewards/EvidenceHallucination/std": 0.4464435875415802, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.7071067690849304, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.554128885269165, + "rewards/VideoAccuracy/std": 0.606694757938385, + "step": 427, + "train_speed(iter/s)": 0.018038 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1055.0, + "completions/mean_length": 579.2857055664062, + "completions/min_length": 309.0, + "entropy/max": 0.734375, + "entropy/mean": 0.31640625, + "entropy/min": 0.1328125, + "epoch": 0.428, + "grad_norm": 1.0621340992825263, + "kl": 0.2294921875, + "learning_rate": 1.2419218955996676e-06, + "loss": 0.002335094381123781, + "memory(GiB)": 147.17, + "reward": 1.875522494316101, + "reward_std": 0.24692384898662567, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.34564051032066345, + "rewards/EvidenceHallucination/std": 0.44944071769714355, + "rewards/Evidence_Num_Record/mean": 5.023809432983398, + "rewards/Evidence_Num_Record/std": 1.8933528661727905, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074110031128, + "rewards/VideoAccuracy/mean": 0.6825847625732422, + "rewards/VideoAccuracy/std": 0.5046215057373047, + "step": 428, + "train_speed(iter/s)": 0.018032 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1528.0, + "completions/mean_length": 497.3333435058594, + "completions/min_length": 324.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.375, + "entropy/min": 0.1416015625, + "epoch": 0.429, + "grad_norm": 1.2591435950206409, + "kl": 0.326171875, + "learning_rate": 1.2388416181207688e-06, + "loss": 0.0034011879470199347, + "memory(GiB)": 147.17, + "reward": 1.6917601823806763, + "reward_std": 0.2739050090312958, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49674439430236816, + "rewards/EvidenceHallucination/std": 0.4614964425563812, + "rewards/Evidence_Num_Record/mean": 4.5, + "rewards/Evidence_Num_Record/std": 1.941209316253662, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5638399124145508, + "rewards/VideoAccuracy/std": 0.4786205291748047, + "step": 429, + "train_speed(iter/s)": 0.018023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 617.0, + "completions/mean_length": 443.8095397949219, + "completions/min_length": 341.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.419921875, + "entropy/min": 0.2890625, + "epoch": 0.43, + "grad_norm": 1.1075464620990976, + "kl": 0.328125, + "learning_rate": 1.2357589355094273e-06, + "loss": 0.003294752910733223, + "memory(GiB)": 147.17, + "reward": 1.500035285949707, + "reward_std": 0.24608975648880005, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31390058994293213, + "rewards/EvidenceHallucination/std": 0.4289908707141876, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7083376049995422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.3705884516239166, + "rewards/VideoAccuracy/std": 0.47497791051864624, + "step": 430, + "train_speed(iter/s)": 0.018028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 616.0, + "completions/mean_length": 422.5238037109375, + "completions/min_length": 269.0, + "entropy/max": 0.578125, + "entropy/mean": 0.291015625, + "entropy/min": 0.1357421875, + "epoch": 0.431, + "grad_norm": 1.1916147615013701, + "kl": 0.279296875, + "learning_rate": 1.2326738788082223e-06, + "loss": 0.002827655989676714, + "memory(GiB)": 147.17, + "reward": 2.2290050983428955, + "reward_std": 0.156645268201828, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44680774211883545, + "rewards/EvidenceHallucination/std": 0.43866533041000366, + "rewards/Evidence_Num_Record/mean": 3.404762029647827, + "rewards/Evidence_Num_Record/std": 0.6270147562026978, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9396434426307678, + "rewards/VideoAccuracy/std": 0.4963000416755676, + "step": 431, + "train_speed(iter/s)": 0.018031 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 555.2380981445312, + "completions/min_length": 287.0, + "entropy/max": 3.125, + "entropy/mean": 0.56640625, + "entropy/min": 0.10498046875, + "epoch": 0.432, + "grad_norm": 1.065170591762536, + "kl": 0.306640625, + "learning_rate": 1.229586479083641e-06, + "loss": 0.0032462298404425383, + "memory(GiB)": 147.17, + "reward": 1.8200960159301758, + "reward_std": 0.2258869856595993, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5885754823684692, + "rewards/EvidenceHallucination/std": 0.4088142514228821, + "rewards/Evidence_Num_Record/mean": 5.023809432983398, + "rewards/Evidence_Num_Record/std": 1.7177424430847168, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7142857313156128, + "rewards/VideoAccuracy/std": 0.45722997188568115, + "step": 432, + "train_speed(iter/s)": 0.018013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 889.0, + "completions/mean_length": 453.1428527832031, + "completions/min_length": 283.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.39453125, + "entropy/min": 0.1923828125, + "epoch": 0.433, + "grad_norm": 1.3709639169709829, + "kl": 0.341796875, + "learning_rate": 1.2264967674257646e-06, + "loss": 0.0034402552992105484, + "memory(GiB)": 147.17, + "reward": 1.442723035812378, + "reward_std": 0.2591922879219055, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23618589341640472, + "rewards/EvidenceHallucination/std": 0.4067757725715637, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.6973367929458618, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.3478667140007019, + "rewards/VideoAccuracy/std": 0.3904357850551605, + "step": 433, + "train_speed(iter/s)": 0.018016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 640.0, + "completions/mean_length": 469.4761962890625, + "completions/min_length": 336.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.3359375, + "entropy/min": 0.1181640625, + "epoch": 0.434, + "grad_norm": 1.071202993072842, + "kl": 0.3046875, + "learning_rate": 1.2234047749479541e-06, + "loss": 0.0030643518548458815, + "memory(GiB)": 147.17, + "reward": 1.7931246757507324, + "reward_std": 0.24046742916107178, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3707311451435089, + "rewards/EvidenceHallucination/std": 0.4582987427711487, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.7589956521987915, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5523116588592529, + "rewards/VideoAccuracy/std": 0.5277762413024902, + "step": 434, + "train_speed(iter/s)": 0.018034 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 846.0, + "completions/mean_length": 478.23809814453125, + "completions/min_length": 323.0, + "entropy/max": 0.703125, + "entropy/mean": 0.35546875, + "entropy/min": 0.1494140625, + "epoch": 0.435, + "grad_norm": 0.9180245436458784, + "kl": 0.27734375, + "learning_rate": 1.2203105327865407e-06, + "loss": 0.002856798470020294, + "memory(GiB)": 147.17, + "reward": 1.877048373222351, + "reward_std": 0.038632702082395554, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5482261180877686, + "rewards/EvidenceHallucination/std": 0.4071207344532013, + "rewards/Evidence_Num_Record/mean": 4.761904716491699, + "rewards/Evidence_Num_Record/std": 1.7080801725387573, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7007363438606262, + "rewards/VideoAccuracy/std": 0.5069580674171448, + "step": 435, + "train_speed(iter/s)": 0.018036 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1574.0, + "completions/mean_length": 499.16668701171875, + "completions/min_length": 300.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.353515625, + "entropy/min": 0.1484375, + "epoch": 0.436, + "grad_norm": 1.3879530713684283, + "kl": 0.314453125, + "learning_rate": 1.2172140721005079e-06, + "loss": 0.0032035568729043007, + "memory(GiB)": 147.17, + "reward": 1.9349544048309326, + "reward_std": 0.13429507613182068, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6770479083061218, + "rewards/EvidenceHallucination/std": 0.394752562046051, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.3104157447814941, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2142857164144516, + "rewards/HonestTime/std": 0.4152997136116028, + "rewards/VideoAccuracy/mean": 0.7566876411437988, + "rewards/VideoAccuracy/std": 0.3377890884876251, + "step": 436, + "train_speed(iter/s)": 0.018023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 660.0, + "completions/mean_length": 420.69049072265625, + "completions/min_length": 230.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.375, + "entropy/min": 0.224609375, + "epoch": 0.437, + "grad_norm": 1.0397588887069589, + "kl": 0.341796875, + "learning_rate": 1.2141154240711804e-06, + "loss": 0.003630727296695113, + "memory(GiB)": 147.17, + "reward": 1.5739099979400635, + "reward_std": 0.12505431473255157, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3220664858818054, + "rewards/EvidenceHallucination/std": 0.44022148847579956, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.8890219330787659, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.4094966650009155, + "rewards/VideoAccuracy/std": 0.5382682681083679, + "step": 437, + "train_speed(iter/s)": 0.018042 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/mean_length": 506.9761962890625, + "completions/min_length": 293.0, + "entropy/max": 1.828125, + "entropy/mean": 0.361328125, + "entropy/min": 0.1005859375, + "epoch": 0.438, + "grad_norm": 1.1826071787259753, + "kl": 0.24609375, + "learning_rate": 1.2110146199019098e-06, + "loss": 0.002538088709115982, + "memory(GiB)": 147.17, + "reward": 1.7834502458572388, + "reward_std": 0.3092948794364929, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3220008611679077, + "rewards/EvidenceHallucination/std": 0.38896769285202026, + "rewards/Evidence_Num_Record/mean": 4.547619342803955, + "rewards/Evidence_Num_Record/std": 2.724794626235962, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.585716724395752, + "rewards/VideoAccuracy/std": 0.4448087513446808, + "step": 438, + "train_speed(iter/s)": 0.01803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 762.0, + "completions/mean_length": 496.0238037109375, + "completions/min_length": 328.0, + "entropy/max": 1.078125, + "entropy/mean": 0.4453125, + "entropy/min": 0.23828125, + "epoch": 0.439, + "grad_norm": 1.2076013926959337, + "kl": 0.298828125, + "learning_rate": 1.207911690817759e-06, + "loss": 0.0030236421152949333, + "memory(GiB)": 147.17, + "reward": 1.730692744255066, + "reward_std": 0.30598652362823486, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49944278597831726, + "rewards/EvidenceHallucination/std": 0.43106022477149963, + "rewards/Evidence_Num_Record/mean": 4.952381134033203, + "rewards/Evidence_Num_Record/std": 1.360637903213501, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.60699462890625, + "rewards/VideoAccuracy/std": 0.45530325174331665, + "step": 439, + "train_speed(iter/s)": 0.018035 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 587.0, + "completions/mean_length": 409.3571472167969, + "completions/min_length": 275.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.3984375, + "entropy/min": 0.228515625, + "epoch": 0.44, + "grad_norm": 1.3589092248962515, + "kl": 0.3515625, + "learning_rate": 1.2048066680651908e-06, + "loss": 0.003545670537278056, + "memory(GiB)": 147.17, + "reward": 1.5632761716842651, + "reward_std": 0.2434949278831482, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2878570556640625, + "rewards/EvidenceHallucination/std": 0.43739238381385803, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.9891983866691589, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.4485619068145752, + "rewards/VideoAccuracy/std": 0.5861182808876038, + "step": 440, + "train_speed(iter/s)": 0.018045 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 598.0, + "completions/mean_length": 432.8095397949219, + "completions/min_length": 351.0, + "entropy/max": 0.546875, + "entropy/mean": 0.291015625, + "entropy/min": 0.142578125, + "epoch": 0.441, + "grad_norm": 0.9782030369969055, + "kl": 0.279296875, + "learning_rate": 1.2016995829117486e-06, + "loss": 0.002998619107529521, + "memory(GiB)": 147.17, + "reward": 2.205361843109131, + "reward_std": 0.05849936604499817, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6928436160087585, + "rewards/EvidenceHallucination/std": 0.3390180766582489, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.8621610999107361, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8667927980422974, + "rewards/VideoAccuracy/std": 0.43811073899269104, + "step": 441, + "train_speed(iter/s)": 0.01805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/mean_length": 509.5, + "completions/min_length": 319.0, + "entropy/max": 1.9140625, + "entropy/mean": 0.4453125, + "entropy/min": 0.193359375, + "epoch": 0.442, + "grad_norm": 1.1929971126976804, + "kl": 0.296875, + "learning_rate": 1.1985904666457453e-06, + "loss": 0.0030174236744642258, + "memory(GiB)": 147.17, + "reward": 1.6967631578445435, + "reward_std": 0.2626940608024597, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5076243877410889, + "rewards/EvidenceHallucination/std": 0.4318709969520569, + "rewards/Evidence_Num_Record/mean": 5.428571701049805, + "rewards/Evidence_Num_Record/std": 2.670003890991211, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679577350616455, + "step": 442, + "train_speed(iter/s)": 0.018051 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 724.0, + "completions/mean_length": 436.90478515625, + "completions/min_length": 263.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.419921875, + "entropy/min": 0.29296875, + "epoch": 0.443, + "grad_norm": 1.5673902316475388, + "kl": 0.322265625, + "learning_rate": 1.1954793505759482e-06, + "loss": 0.003226308850571513, + "memory(GiB)": 147.17, + "reward": 1.6576652526855469, + "reward_std": 0.3279918432235718, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.327426940202713, + "rewards/EvidenceHallucination/std": 0.4299605190753937, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.3716899156570435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.5350369811058044, + "rewards/VideoAccuracy/std": 0.4003976285457611, + "step": 443, + "train_speed(iter/s)": 0.018056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 800.0, + "completions/mean_length": 432.452392578125, + "completions/min_length": 287.0, + "entropy/max": 0.50390625, + "entropy/mean": 0.322265625, + "entropy/min": 0.1298828125, + "epoch": 0.444, + "grad_norm": 1.290247829605721, + "kl": 0.330078125, + "learning_rate": 1.192366266031261e-06, + "loss": 0.003352985717356205, + "memory(GiB)": 147.17, + "reward": 2.2507922649383545, + "reward_std": 0.25693458318710327, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6864717602729797, + "rewards/EvidenceHallucination/std": 0.3530677258968353, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 1.0714867115020752, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.946831226348877, + "rewards/VideoAccuracy/std": 0.47743329405784607, + "step": 444, + "train_speed(iter/s)": 0.018058 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1707.0, + "completions/mean_length": 541.0238037109375, + "completions/min_length": 275.0, + "entropy/max": 0.703125, + "entropy/mean": 0.33984375, + "entropy/min": 0.1298828125, + "epoch": 0.445, + "grad_norm": 1.3235877313949866, + "kl": 0.24609375, + "learning_rate": 1.1892512443604101e-06, + "loss": 0.0026101216208189726, + "memory(GiB)": 147.17, + "reward": 1.735228419303894, + "reward_std": 0.3607367277145386, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42023664712905884, + "rewards/EvidenceHallucination/std": 0.3911043405532837, + "rewards/Evidence_Num_Record/mean": 5.476190567016602, + "rewards/Evidence_Num_Record/std": 3.535451650619507, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5845143795013428, + "rewards/VideoAccuracy/std": 0.5250858068466187, + "step": 445, + "train_speed(iter/s)": 0.018051 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 800.0, + "completions/mean_length": 471.6190490722656, + "completions/min_length": 334.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.37109375, + "entropy/min": 0.23828125, + "epoch": 0.446, + "grad_norm": 1.009163131938503, + "kl": 0.328125, + "learning_rate": 1.18613431693163e-06, + "loss": 0.0032964288257062435, + "memory(GiB)": 147.17, + "reward": 1.609557032585144, + "reward_std": 0.15845243632793427, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3692663609981537, + "rewards/EvidenceHallucination/std": 0.4354756474494934, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.4152398109436035, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.4690369665622711, + "rewards/VideoAccuracy/std": 0.4111632704734802, + "step": 446, + "train_speed(iter/s)": 0.018053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 794.0, + "completions/mean_length": 459.23809814453125, + "completions/min_length": 288.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.3828125, + "entropy/min": 0.2275390625, + "epoch": 0.447, + "grad_norm": 1.1351614156474819, + "kl": 0.333984375, + "learning_rate": 1.1830155151323444e-06, + "loss": 0.0033620535396039486, + "memory(GiB)": 147.17, + "reward": 1.8145294189453125, + "reward_std": 0.2776869535446167, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4702847898006439, + "rewards/EvidenceHallucination/std": 0.42905890941619873, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.3710546493530273, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6204724311828613, + "rewards/VideoAccuracy/std": 0.5245855450630188, + "step": 447, + "train_speed(iter/s)": 0.018059 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 809.0, + "completions/mean_length": 505.0, + "completions/min_length": 309.0, + "entropy/max": 0.67578125, + "entropy/mean": 0.310546875, + "entropy/min": 0.158203125, + "epoch": 0.448, + "grad_norm": 1.031082296659848, + "kl": 0.26953125, + "learning_rate": 1.1798948703688538e-06, + "loss": 0.0027520672883838415, + "memory(GiB)": 147.17, + "reward": 1.888918399810791, + "reward_std": 0.21466487646102905, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4228678345680237, + "rewards/EvidenceHallucination/std": 0.4388137459754944, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.2505515813827515, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6710115075111389, + "rewards/VideoAccuracy/std": 0.5001021027565002, + "step": 448, + "train_speed(iter/s)": 0.01806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 990.0, + "completions/mean_length": 478.6428527832031, + "completions/min_length": 303.0, + "entropy/max": 0.91015625, + "entropy/mean": 0.41796875, + "entropy/min": 0.275390625, + "epoch": 0.449, + "grad_norm": 1.2527013531866111, + "kl": 0.314453125, + "learning_rate": 1.1767724140660156e-06, + "loss": 0.003172614611685276, + "memory(GiB)": 147.17, + "reward": 1.6928980350494385, + "reward_std": 0.21076981723308563, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5474359393119812, + "rewards/EvidenceHallucination/std": 0.46206578612327576, + "rewards/Evidence_Num_Record/mean": 4.642857074737549, + "rewards/Evidence_Num_Record/std": 1.9978210926055908, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5500772595405579, + "rewards/VideoAccuracy/std": 0.46221691370010376, + "step": 449, + "train_speed(iter/s)": 0.018056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 646.0, + "completions/mean_length": 418.5, + "completions/min_length": 287.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.376953125, + "entropy/min": 0.2177734375, + "epoch": 0.45, + "grad_norm": 1.3902251118272175, + "kl": 0.341796875, + "learning_rate": 1.1736481776669305e-06, + "loss": 0.0034268698655068874, + "memory(GiB)": 147.17, + "reward": 1.7026755809783936, + "reward_std": 0.2461749017238617, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4613755941390991, + "rewards/EvidenceHallucination/std": 0.45595988631248474, + "rewards/Evidence_Num_Record/mean": 3.5, + "rewards/Evidence_Num_Record/std": 0.9173131585121155, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5484958291053772, + "rewards/VideoAccuracy/std": 0.4273762106895447, + "step": 450, + "train_speed(iter/s)": 0.018064 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 826.0, + "completions/mean_length": 518.6666870117188, + "completions/min_length": 282.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.29296875, + "entropy/min": 0.1591796875, + "epoch": 0.451, + "grad_norm": 1.1511943444979413, + "kl": 0.279296875, + "learning_rate": 1.1705221926326238e-06, + "loss": 0.0028034679125994444, + "memory(GiB)": 147.17, + "reward": 2.190749168395996, + "reward_std": 0.07624303549528122, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2890765070915222, + "rewards/EvidenceHallucination/std": 0.41826358437538147, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 0.9830148816108704, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9329338073730469, + "rewards/VideoAccuracy/std": 0.334219753742218, + "step": 451, + "train_speed(iter/s)": 0.018053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 887.0, + "completions/mean_length": 494.4761962890625, + "completions/min_length": 350.0, + "entropy/max": 1.375, + "entropy/mean": 0.453125, + "entropy/min": 0.1962890625, + "epoch": 0.452, + "grad_norm": 1.2027789028409457, + "kl": 0.3359375, + "learning_rate": 1.1673944904417308e-06, + "loss": 0.0033881841227412224, + "memory(GiB)": 147.17, + "reward": 1.7803906202316284, + "reward_std": 0.3277139365673065, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5686198472976685, + "rewards/EvidenceHallucination/std": 0.4226139485836029, + "rewards/Evidence_Num_Record/mean": 4.6666669845581055, + "rewards/Evidence_Num_Record/std": 1.7897632122039795, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 452, + "train_speed(iter/s)": 0.018028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 871.0, + "completions/mean_length": 516.5714111328125, + "completions/min_length": 319.0, + "entropy/max": 0.54296875, + "entropy/mean": 0.416015625, + "entropy/min": 0.2412109375, + "epoch": 0.453, + "grad_norm": 1.214475764928054, + "kl": 0.2890625, + "learning_rate": 1.164265102590177e-06, + "loss": 0.0029415320605039597, + "memory(GiB)": 147.17, + "reward": 1.564728856086731, + "reward_std": 0.20751957595348358, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28774741291999817, + "rewards/EvidenceHallucination/std": 0.40104275941848755, + "rewards/Evidence_Num_Record/mean": 4.690476417541504, + "rewards/Evidence_Num_Record/std": 1.8804266452789307, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.4595602750778198, + "rewards/VideoAccuracy/std": 0.4347084164619446, + "step": 453, + "train_speed(iter/s)": 0.018007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 758.0, + "completions/mean_length": 508.8571472167969, + "completions/min_length": 326.0, + "entropy/max": 0.4765625, + "entropy/mean": 0.333984375, + "entropy/min": 0.142578125, + "epoch": 0.454, + "grad_norm": 1.1316807884478712, + "kl": 0.310546875, + "learning_rate": 1.1611340605908642e-06, + "loss": 0.0031346462201327085, + "memory(GiB)": 147.17, + "reward": 1.9818377494812012, + "reward_std": 0.237799733877182, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3703981935977936, + "rewards/EvidenceHallucination/std": 0.4406714141368866, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.2623374462127686, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7410913109779358, + "rewards/VideoAccuracy/std": 0.5927178859710693, + "step": 454, + "train_speed(iter/s)": 0.018021 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/mean_length": 591.047607421875, + "completions/min_length": 355.0, + "entropy/max": 1.484375, + "entropy/mean": 0.357421875, + "entropy/min": 0.17578125, + "epoch": 0.455, + "grad_norm": 1.1066951817944102, + "kl": 0.26953125, + "learning_rate": 1.15800139597335e-06, + "loss": 0.0028007570654153824, + "memory(GiB)": 147.17, + "reward": 1.9226216077804565, + "reward_std": 0.21123027801513672, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44682320952415466, + "rewards/EvidenceHallucination/std": 0.4188815951347351, + "rewards/Evidence_Num_Record/mean": 5.285714149475098, + "rewards/Evidence_Num_Record/std": 2.178450107574463, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.766590416431427, + "rewards/VideoAccuracy/std": 0.49839770793914795, + "step": 455, + "train_speed(iter/s)": 0.01802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 726.0, + "completions/mean_length": 485.9761962890625, + "completions/min_length": 334.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.4140625, + "entropy/min": 0.267578125, + "epoch": 0.456, + "grad_norm": 1.1556889569386688, + "kl": 0.3125, + "learning_rate": 1.1548671402835324e-06, + "loss": 0.0031380036380141973, + "memory(GiB)": 147.17, + "reward": 1.6291717290878296, + "reward_std": 0.16052106022834778, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29264798760414124, + "rewards/EvidenceHallucination/std": 0.41749146580696106, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.4183138608932495, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008703470230103, + "rewards/VideoAccuracy/mean": 0.484927773475647, + "rewards/VideoAccuracy/std": 0.39783400297164917, + "step": 456, + "train_speed(iter/s)": 0.018014 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 695.0, + "completions/mean_length": 453.26190185546875, + "completions/min_length": 331.0, + "entropy/max": 0.53125, + "entropy/mean": 0.388671875, + "entropy/min": 0.2001953125, + "epoch": 0.457, + "grad_norm": 1.3107310194352497, + "kl": 0.341796875, + "learning_rate": 1.1517313250833317e-06, + "loss": 0.0034421063028275967, + "memory(GiB)": 147.17, + "reward": 2.007448196411133, + "reward_std": 0.0665355920791626, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.605268120765686, + "rewards/EvidenceHallucination/std": 0.441053181886673, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.1168646812438965, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.786394476890564, + "rewards/VideoAccuracy/std": 0.4613673686981201, + "step": 457, + "train_speed(iter/s)": 0.018015 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1236.0, + "completions/mean_length": 583.40478515625, + "completions/min_length": 285.0, + "entropy/max": 0.6875, + "entropy/mean": 0.25390625, + "entropy/min": 0.0849609375, + "epoch": 0.458, + "grad_norm": 0.9236477934543165, + "kl": 0.2373046875, + "learning_rate": 1.1485939819503716e-06, + "loss": 0.002488694153726101, + "memory(GiB)": 147.17, + "reward": 2.173037052154541, + "reward_std": 0.20351508259773254, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5917856693267822, + "rewards/EvidenceHallucination/std": 0.3949899673461914, + "rewards/Evidence_Num_Record/mean": 5.357142925262451, + "rewards/Evidence_Num_Record/std": 2.895087718963623, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9213466048240662, + "rewards/VideoAccuracy/std": 0.3844154477119446, + "step": 458, + "train_speed(iter/s)": 0.018013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1602.0, + "completions/mean_length": 608.6666870117188, + "completions/min_length": 386.0, + "entropy/max": 0.90625, + "entropy/mean": 0.3828125, + "entropy/min": 0.22265625, + "epoch": 0.459, + "grad_norm": 1.0766498829438478, + "kl": 0.279296875, + "learning_rate": 1.1454551424776635e-06, + "loss": 0.0028959258925169706, + "memory(GiB)": 147.17, + "reward": 1.6666834354400635, + "reward_std": 0.13521941006183624, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4474688172340393, + "rewards/EvidenceHallucination/std": 0.4277777075767517, + "rewards/Evidence_Num_Record/mean": 6.5714287757873535, + "rewards/Evidence_Num_Record/std": 3.82637095451355, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5486180782318115, + "rewards/VideoAccuracy/std": 0.46456602215766907, + "step": 459, + "train_speed(iter/s)": 0.018008 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 706.0, + "completions/mean_length": 464.4761962890625, + "completions/min_length": 278.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.3828125, + "entropy/min": 0.228515625, + "epoch": 0.46, + "grad_norm": 1.1448579776748715, + "kl": 0.326171875, + "learning_rate": 1.1423148382732853e-06, + "loss": 0.0032780009787529707, + "memory(GiB)": 147.17, + "reward": 1.3398847579956055, + "reward_std": 0.21157526969909668, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1676062047481537, + "rewards/EvidenceHallucination/std": 0.3510550558567047, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 1.33999502658844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500061869621277, + "rewards/VideoAccuracy/mean": 0.2539824843406677, + "rewards/VideoAccuracy/std": 0.35338884592056274, + "step": 460, + "train_speed(iter/s)": 0.018025 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 585.0, + "completions/mean_length": 452.0, + "completions/min_length": 341.0, + "entropy/max": 0.45703125, + "entropy/mean": 0.328125, + "entropy/min": 0.130859375, + "epoch": 0.461, + "grad_norm": 1.1064787379603562, + "kl": 0.294921875, + "learning_rate": 1.1391731009600653e-06, + "loss": 0.0029807686805725098, + "memory(GiB)": 147.17, + "reward": 2.0820388793945312, + "reward_std": 0.13201361894607544, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4889209568500519, + "rewards/EvidenceHallucination/std": 0.43544960021972656, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.8164965510368347, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.7937784790992737, + "rewards/VideoAccuracy/std": 0.4980928599834442, + "step": 461, + "train_speed(iter/s)": 0.018026 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 737.2619018554688, + "completions/min_length": 353.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.298828125, + "entropy/min": 0.0703125, + "epoch": 0.462, + "grad_norm": 0.9776750470192334, + "kl": 0.2373046875, + "learning_rate": 1.1360299621752643e-06, + "loss": 0.0026709954254329205, + "memory(GiB)": 147.17, + "reward": 1.6693943738937378, + "reward_std": 0.11008346080780029, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4303053915500641, + "rewards/EvidenceHallucination/std": 0.38703253865242004, + "rewards/Evidence_Num_Record/mean": 6.523809432983398, + "rewards/Evidence_Num_Record/std": 4.379765510559082, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5952380895614624, + "rewards/VideoAccuracy/std": 0.49679577350616455, + "step": 462, + "train_speed(iter/s)": 0.01801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 799.0, + "completions/mean_length": 509.8095397949219, + "completions/min_length": 346.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.4140625, + "entropy/min": 0.251953125, + "epoch": 0.463, + "grad_norm": 1.2944413633743106, + "kl": 0.302734375, + "learning_rate": 1.1328854535702542e-06, + "loss": 0.0030356289353221655, + "memory(GiB)": 147.17, + "reward": 1.8189336061477661, + "reward_std": 0.32026880979537964, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5183867812156677, + "rewards/EvidenceHallucination/std": 0.4302096664905548, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.4305444955825806, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6485896110534668, + "rewards/VideoAccuracy/std": 0.3347703218460083, + "step": 463, + "train_speed(iter/s)": 0.018013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 681.0, + "completions/mean_length": 478.6428527832031, + "completions/min_length": 312.0, + "entropy/max": 0.53125, + "entropy/mean": 0.33203125, + "entropy/min": 0.2080078125, + "epoch": 0.464, + "grad_norm": 1.2620868621492844, + "kl": 0.337890625, + "learning_rate": 1.1297396068102017e-06, + "loss": 0.0034277853555977345, + "memory(GiB)": 147.17, + "reward": 2.0954244136810303, + "reward_std": 0.13253745436668396, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.47420939803123474, + "rewards/EvidenceHallucination/std": 0.4610733687877655, + "rewards/Evidence_Num_Record/mean": 4.309524059295654, + "rewards/Evidence_Num_Record/std": 1.2970528602600098, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8339158296585083, + "rewards/VideoAccuracy/std": 0.5612301826477051, + "step": 464, + "train_speed(iter/s)": 0.01802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1319.0, + "completions/mean_length": 600.357177734375, + "completions/min_length": 287.0, + "entropy/max": 1.546875, + "entropy/mean": 0.392578125, + "entropy/min": 0.07373046875, + "epoch": 0.465, + "grad_norm": 0.9052743145971144, + "kl": 0.2431640625, + "learning_rate": 1.1265924535737492e-06, + "loss": 0.0025208794977515936, + "memory(GiB)": 147.17, + "reward": 1.5555534362792969, + "reward_std": 0.19387571513652802, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16526667773723602, + "rewards/EvidenceHallucination/std": 0.31730732321739197, + "rewards/Evidence_Num_Record/mean": 5.5714287757873535, + "rewards/Evidence_Num_Record/std": 2.188025712966919, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4558333456516266, + "rewards/VideoAccuracy/std": 0.4882560968399048, + "step": 465, + "train_speed(iter/s)": 0.018016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 992.0, + "completions/mean_length": 538.4761962890625, + "completions/min_length": 308.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.3828125, + "entropy/min": 0.265625, + "epoch": 0.466, + "grad_norm": 1.1128596821312087, + "kl": 0.298828125, + "learning_rate": 1.1234440255526948e-06, + "loss": 0.0030189414974302053, + "memory(GiB)": 147.17, + "reward": 1.7855602502822876, + "reward_std": 0.2355557084083557, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6041164398193359, + "rewards/EvidenceHallucination/std": 0.44853171706199646, + "rewards/Evidence_Num_Record/mean": 5.1666669845581055, + "rewards/Evidence_Num_Record/std": 2.1630678176879883, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6314036250114441, + "rewards/VideoAccuracy/std": 0.40444111824035645, + "step": 466, + "train_speed(iter/s)": 0.018015 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 666.0, + "completions/mean_length": 462.952392578125, + "completions/min_length": 314.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.36328125, + "entropy/min": 0.240234375, + "epoch": 0.467, + "grad_norm": 1.1542888484351184, + "kl": 0.322265625, + "learning_rate": 1.1202943544516735e-06, + "loss": 0.003247791901230812, + "memory(GiB)": 147.17, + "reward": 2.08418869972229, + "reward_std": 0.15974998474121094, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6274665594100952, + "rewards/EvidenceHallucination/std": 0.440139502286911, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.0722993612289429, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8586952686309814, + "rewards/VideoAccuracy/std": 0.5073198676109314, + "step": 467, + "train_speed(iter/s)": 0.017993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1546.0, + "completions/mean_length": 541.40478515625, + "completions/min_length": 270.0, + "entropy/max": 0.6875, + "entropy/mean": 0.328125, + "entropy/min": 0.12451171875, + "epoch": 0.468, + "grad_norm": 1.1089900123712406, + "kl": 0.267578125, + "learning_rate": 1.1171434719878383e-06, + "loss": 0.00274701789021492, + "memory(GiB)": 147.17, + "reward": 1.9164142608642578, + "reward_std": 0.12607529759407043, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45135653018951416, + "rewards/EvidenceHallucination/std": 0.38880568742752075, + "rewards/Evidence_Num_Record/mean": 5.285714149475098, + "rewards/Evidence_Num_Record/std": 3.344348907470703, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.692809522151947, + "rewards/VideoAccuracy/std": 0.5157350301742554, + "step": 468, + "train_speed(iter/s)": 0.01799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 952.0, + "completions/mean_length": 554.0714111328125, + "completions/min_length": 402.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.3828125, + "entropy/min": 0.224609375, + "epoch": 0.469, + "grad_norm": 1.2050191357372941, + "kl": 0.287109375, + "learning_rate": 1.1139914098905405e-06, + "loss": 0.0028876017313450575, + "memory(GiB)": 147.17, + "reward": 1.5697916746139526, + "reward_std": 0.3003442883491516, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3716760575771332, + "rewards/EvidenceHallucination/std": 0.4212680459022522, + "rewards/Evidence_Num_Record/mean": 5.642857074737549, + "rewards/Evidence_Num_Record/std": 1.7918709516525269, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.46212318539619446, + "rewards/VideoAccuracy/std": 0.4985504448413849, + "step": 469, + "train_speed(iter/s)": 0.017994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 975.0, + "completions/mean_length": 500.6428527832031, + "completions/min_length": 318.0, + "entropy/max": 0.578125, + "entropy/mean": 0.392578125, + "entropy/min": 0.2021484375, + "epoch": 0.47, + "grad_norm": 1.2681006412044444, + "kl": 0.318359375, + "learning_rate": 1.110838199901011e-06, + "loss": 0.003217041026800871, + "memory(GiB)": 147.17, + "reward": 1.4075568914413452, + "reward_std": 0.23479227721691132, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2022327035665512, + "rewards/EvidenceHallucination/std": 0.39306485652923584, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 2.091407060623169, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500061869621277, + "rewards/VideoAccuracy/mean": 0.3147293031215668, + "rewards/VideoAccuracy/std": 0.4137459993362427, + "step": 470, + "train_speed(iter/s)": 0.018 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 839.0, + "completions/mean_length": 524.5238037109375, + "completions/min_length": 345.0, + "entropy/max": 0.50390625, + "entropy/mean": 0.330078125, + "entropy/min": 0.1484375, + "epoch": 0.471, + "grad_norm": 0.9793066015071863, + "kl": 0.275390625, + "learning_rate": 1.107683873772039e-06, + "loss": 0.0029692240059375763, + "memory(GiB)": 147.17, + "reward": 1.9420217275619507, + "reward_std": 0.08886364102363586, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41147086024284363, + "rewards/EvidenceHallucination/std": 0.4166224002838135, + "rewards/Evidence_Num_Record/mean": 4.547619342803955, + "rewards/Evidence_Num_Record/std": 1.4684051275253296, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.6644895076751709, + "rewards/VideoAccuracy/std": 0.44928592443466187, + "step": 471, + "train_speed(iter/s)": 0.018001 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 980.0, + "completions/mean_length": 542.452392578125, + "completions/min_length": 361.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.390625, + "entropy/min": 0.255859375, + "epoch": 0.472, + "grad_norm": 1.1380247355250153, + "kl": 0.291015625, + "learning_rate": 1.1045284632676535e-06, + "loss": 0.00299941748380661, + "memory(GiB)": 147.17, + "reward": 1.785868763923645, + "reward_std": 0.012286361306905746, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5960100889205933, + "rewards/EvidenceHallucination/std": 0.4448285698890686, + "rewards/Evidence_Num_Record/mean": 5.190476417541504, + "rewards/Evidence_Num_Record/std": 1.5654516220092773, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6666666865348816, + "rewards/VideoAccuracy/std": 0.47711876034736633, + "step": 472, + "train_speed(iter/s)": 0.01797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 785.0, + "completions/mean_length": 526.3095092773438, + "completions/min_length": 373.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.421875, + "entropy/min": 0.25, + "epoch": 0.473, + "grad_norm": 1.1189761887851029, + "kl": 0.275390625, + "learning_rate": 1.1013720001628034e-06, + "loss": 0.002974391682073474, + "memory(GiB)": 147.17, + "reward": 1.4630788564682007, + "reward_std": 0.4351825416088104, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.25441774725914, + "rewards/EvidenceHallucination/std": 0.3907027244567871, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 1.311965823173523, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.35029053688049316, + "rewards/VideoAccuracy/std": 0.4316782057285309, + "step": 473, + "train_speed(iter/s)": 0.017975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1089.0, + "completions/mean_length": 532.1190795898438, + "completions/min_length": 335.0, + "entropy/max": 0.48828125, + "entropy/mean": 0.3515625, + "entropy/min": 0.193359375, + "epoch": 0.474, + "grad_norm": 1.2260793976716424, + "kl": 0.302734375, + "learning_rate": 1.0982145162430371e-06, + "loss": 0.0030711570288985968, + "memory(GiB)": 147.17, + "reward": 1.9970827102661133, + "reward_std": 0.22381845116615295, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48039278388023376, + "rewards/EvidenceHallucination/std": 0.4535634219646454, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 1.5970356464385986, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7343373894691467, + "rewards/VideoAccuracy/std": 0.5738323926925659, + "step": 474, + "train_speed(iter/s)": 0.01797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1251.0, + "completions/mean_length": 588.2142944335938, + "completions/min_length": 330.0, + "entropy/max": 1.2109375, + "entropy/mean": 0.3046875, + "entropy/min": 0.1376953125, + "epoch": 0.475, + "grad_norm": 2.7216745852020106, + "kl": 0.52734375, + "learning_rate": 1.0950560433041825e-06, + "loss": 0.0056946794502437115, + "memory(GiB)": 147.17, + "reward": 2.2821545600891113, + "reward_std": 0.06225450336933136, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6848945021629333, + "rewards/EvidenceHallucination/std": 0.26604753732681274, + "rewards/Evidence_Num_Record/mean": 5.857142925262451, + "rewards/Evidence_Num_Record/std": 3.2277109622955322, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 1.0785090923309326, + "rewards/VideoAccuracy/std": 0.12712201476097107, + "step": 475, + "train_speed(iter/s)": 0.017961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 842.0, + "completions/mean_length": 538.1190795898438, + "completions/min_length": 373.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.39453125, + "entropy/min": 0.2197265625, + "epoch": 0.476, + "grad_norm": 1.338200222826453, + "kl": 0.287109375, + "learning_rate": 1.0918966131520276e-06, + "loss": 0.002909306436777115, + "memory(GiB)": 147.17, + "reward": 1.718570351600647, + "reward_std": 0.2557729482650757, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4614545404911041, + "rewards/EvidenceHallucination/std": 0.4442642629146576, + "rewards/Evidence_Num_Record/mean": 4.761904716491699, + "rewards/Evidence_Num_Record/std": 1.1647155284881592, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5643746256828308, + "rewards/VideoAccuracy/std": 0.4282829463481903, + "step": 476, + "train_speed(iter/s)": 0.017959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 627.7619018554688, + "completions/min_length": 367.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.34765625, + "entropy/min": 0.08056640625, + "epoch": 0.477, + "grad_norm": 1.1502767846006685, + "kl": 0.275390625, + "learning_rate": 1.0887362576019981e-06, + "loss": 0.002875441685318947, + "memory(GiB)": 147.17, + "reward": 2.072842836380005, + "reward_std": 0.15147219598293304, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.701572835445404, + "rewards/EvidenceHallucination/std": 0.3567623794078827, + "rewards/Evidence_Num_Record/mean": 5.833333492279053, + "rewards/Evidence_Num_Record/std": 5.3278679847717285, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8444330096244812, + "rewards/VideoAccuracy/std": 0.42393815517425537, + "step": 477, + "train_speed(iter/s)": 0.017935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1164.0, + "completions/mean_length": 547.1904907226562, + "completions/min_length": 305.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.31640625, + "entropy/min": 0.1376953125, + "epoch": 0.478, + "grad_norm": 1.0997246913390766, + "kl": 0.25390625, + "learning_rate": 1.0855750084788398e-06, + "loss": 0.00258713960647583, + "memory(GiB)": 147.17, + "reward": 2.1575942039489746, + "reward_std": 0.19384567439556122, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5576894283294678, + "rewards/EvidenceHallucination/std": 0.4132033884525299, + "rewards/Evidence_Num_Record/mean": 4.6666669845581055, + "rewards/Evidence_Num_Record/std": 1.8033393621444702, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9127230048179626, + "rewards/VideoAccuracy/std": 0.4475727081298828, + "step": 478, + "train_speed(iter/s)": 0.017937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 573.5238037109375, + "completions/min_length": 382.0, + "entropy/max": 0.75, + "entropy/mean": 0.3828125, + "entropy/min": 0.1015625, + "epoch": 0.479, + "grad_norm": 1.0940228515662154, + "kl": 0.255859375, + "learning_rate": 1.0824128976162962e-06, + "loss": 0.002665138803422451, + "memory(GiB)": 147.17, + "reward": 1.4639655351638794, + "reward_std": 0.19348707795143127, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30238524079322815, + "rewards/EvidenceHallucination/std": 0.40099501609802246, + "rewards/Evidence_Num_Record/mean": 5.404761791229248, + "rewards/Evidence_Num_Record/std": 4.633175849914551, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.38205984234809875, + "rewards/VideoAccuracy/std": 0.49067986011505127, + "step": 479, + "train_speed(iter/s)": 0.017913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 817.0, + "completions/mean_length": 525.6666870117188, + "completions/min_length": 370.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.388671875, + "entropy/min": 0.25, + "epoch": 0.48, + "grad_norm": 1.2701543867108012, + "kl": 0.265625, + "learning_rate": 1.0792499568567884e-06, + "loss": 0.0026680571027100086, + "memory(GiB)": 147.17, + "reward": 1.519570231437683, + "reward_std": 0.22163185477256775, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2485884577035904, + "rewards/EvidenceHallucination/std": 0.42518365383148193, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.6709585189819336, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.40794771909713745, + "rewards/VideoAccuracy/std": 0.508557140827179, + "step": 480, + "train_speed(iter/s)": 0.01792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 813.0, + "completions/mean_length": 513.9761962890625, + "completions/min_length": 369.0, + "entropy/max": 0.609375, + "entropy/mean": 0.31640625, + "entropy/min": 0.12109375, + "epoch": 0.481, + "grad_norm": 1.2578409875339809, + "kl": 0.263671875, + "learning_rate": 1.076086218051095e-06, + "loss": 0.0026590488851070404, + "memory(GiB)": 147.17, + "reward": 2.2940688133239746, + "reward_std": 0.1663028597831726, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6069757342338562, + "rewards/EvidenceHallucination/std": 0.4343172311782837, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 0.9927144050598145, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9774355292320251, + "rewards/VideoAccuracy/std": 0.33955731987953186, + "step": 481, + "train_speed(iter/s)": 0.017919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/mean_length": 618.0238037109375, + "completions/min_length": 317.0, + "entropy/max": 1.34375, + "entropy/mean": 0.455078125, + "entropy/min": 0.251953125, + "epoch": 0.482, + "grad_norm": 0.8808210235719656, + "kl": 0.2275390625, + "learning_rate": 1.0729217130580309e-06, + "loss": 0.002350968774408102, + "memory(GiB)": 147.17, + "reward": 1.387406587600708, + "reward_std": 0.24863937497138977, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27036601305007935, + "rewards/EvidenceHallucination/std": 0.39469394087791443, + "rewards/Evidence_Num_Record/mean": 6.476190567016602, + "rewards/Evidence_Num_Record/std": 4.043893814086914, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3333333432674408, + "rewards/VideoAccuracy/std": 0.47711870074272156, + "step": 482, + "train_speed(iter/s)": 0.017909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1115.0, + "completions/mean_length": 594.8333740234375, + "completions/min_length": 368.0, + "entropy/max": 0.6875, + "entropy/mean": 0.3828125, + "entropy/min": 0.23828125, + "epoch": 0.483, + "grad_norm": 1.115115173966916, + "kl": 0.271484375, + "learning_rate": 1.069756473744125e-06, + "loss": 0.0027435943484306335, + "memory(GiB)": 147.17, + "reward": 1.4424556493759155, + "reward_std": 0.2692454755306244, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2950797379016876, + "rewards/EvidenceHallucination/std": 0.4263550043106079, + "rewards/Evidence_Num_Record/mean": 5.5, + "rewards/Evidence_Num_Record/std": 1.7976950407028198, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.3548682928085327, + "rewards/VideoAccuracy/std": 0.41278472542762756, + "step": 483, + "train_speed(iter/s)": 0.017912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 784.0, + "completions/mean_length": 480.6428527832031, + "completions/min_length": 338.0, + "entropy/max": 0.5, + "entropy/mean": 0.341796875, + "entropy/min": 0.1630859375, + "epoch": 0.484, + "grad_norm": 1.2047242245282004, + "kl": 0.302734375, + "learning_rate": 1.066590531983304e-06, + "loss": 0.0030420708935707808, + "memory(GiB)": 147.17, + "reward": 2.144254207611084, + "reward_std": 0.12368159741163254, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5195605754852295, + "rewards/EvidenceHallucination/std": 0.38605642318725586, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.2176117897033691, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.8784372210502625, + "rewards/VideoAccuracy/std": 0.46715590357780457, + "step": 484, + "train_speed(iter/s)": 0.017913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/mean_length": 587.7619018554688, + "completions/min_length": 310.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.357421875, + "entropy/min": 0.150390625, + "epoch": 0.485, + "grad_norm": 1.0202696321221674, + "kl": 0.2275390625, + "learning_rate": 1.0634239196565644e-06, + "loss": 0.00234436197206378, + "memory(GiB)": 147.17, + "reward": 1.95667564868927, + "reward_std": 0.18724535405635834, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.520356297492981, + "rewards/EvidenceHallucination/std": 0.39924415946006775, + "rewards/Evidence_Num_Record/mean": 5.761904716491699, + "rewards/Evidence_Num_Record/std": 2.8353991508483887, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7859376668930054, + "rewards/VideoAccuracy/std": 0.4797806143760681, + "step": 485, + "train_speed(iter/s)": 0.017905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1612.0, + "completions/mean_length": 582.547607421875, + "completions/min_length": 320.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.359375, + "entropy/min": 0.158203125, + "epoch": 0.486, + "grad_norm": 1.2034500202061464, + "kl": 0.2451171875, + "learning_rate": 1.0602566686516584e-06, + "loss": 0.0025604632683098316, + "memory(GiB)": 147.17, + "reward": 1.624455451965332, + "reward_std": 0.11654912680387497, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3282482922077179, + "rewards/EvidenceHallucination/std": 0.3944683372974396, + "rewards/Evidence_Num_Record/mean": 5.1666669845581055, + "rewards/Evidence_Num_Record/std": 2.713261604309082, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5254723429679871, + "rewards/VideoAccuracy/std": 0.4686095714569092, + "step": 486, + "train_speed(iter/s)": 0.017893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 610.1904907226562, + "completions/min_length": 335.0, + "entropy/max": 0.7265625, + "entropy/mean": 0.390625, + "entropy/min": 0.16015625, + "epoch": 0.487, + "grad_norm": 1.1250653949961946, + "kl": 0.265625, + "learning_rate": 1.057088810862768e-06, + "loss": 0.0027342557441443205, + "memory(GiB)": 147.17, + "reward": 1.7513415813446045, + "reward_std": 0.28683042526245117, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4083775281906128, + "rewards/EvidenceHallucination/std": 0.4645242989063263, + "rewards/Evidence_Num_Record/mean": 6.142857074737549, + "rewards/Evidence_Num_Record/std": 5.973810195922852, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.5815707445144653, + "rewards/VideoAccuracy/std": 0.6242651343345642, + "step": 487, + "train_speed(iter/s)": 0.017884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1996.0, + "completions/mean_length": 640.5714111328125, + "completions/min_length": 379.0, + "entropy/max": 0.5, + "entropy/mean": 0.302734375, + "entropy/min": 0.1669921875, + "epoch": 0.488, + "grad_norm": 1.0456907944976346, + "kl": 0.2333984375, + "learning_rate": 1.053920378190186e-06, + "loss": 0.0024078013375401497, + "memory(GiB)": 147.17, + "reward": 1.766273856163025, + "reward_std": 0.28851383924484253, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.320260226726532, + "rewards/EvidenceHallucination/std": 0.40128740668296814, + "rewards/Evidence_Num_Record/mean": 6.214285850524902, + "rewards/Evidence_Num_Record/std": 5.224406719207764, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5688884854316711, + "rewards/VideoAccuracy/std": 0.4637153446674347, + "step": 488, + "train_speed(iter/s)": 0.017875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2289.0, + "completions/mean_length": 630.0714111328125, + "completions/min_length": 373.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.392578125, + "entropy/min": 0.1455078125, + "epoch": 0.489, + "grad_norm": 1.1567690080526456, + "kl": 0.2265625, + "learning_rate": 1.0507514025399942e-06, + "loss": 0.002367014531046152, + "memory(GiB)": 147.17, + "reward": 1.8700870275497437, + "reward_std": 0.1770918369293213, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5409119129180908, + "rewards/EvidenceHallucination/std": 0.35729101300239563, + "rewards/Evidence_Num_Record/mean": 5.404761791229248, + "rewards/Evidence_Num_Record/std": 2.164677858352661, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.761904776096344, + "rewards/VideoAccuracy/std": 0.43108054995536804, + "step": 489, + "train_speed(iter/s)": 0.017858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 988.0, + "completions/mean_length": 495.4285888671875, + "completions/min_length": 325.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.3984375, + "entropy/min": 0.19921875, + "epoch": 0.49, + "grad_norm": 1.065173367019639, + "kl": 0.279296875, + "learning_rate": 1.0475819158237424e-06, + "loss": 0.0028036325238645077, + "memory(GiB)": 147.17, + "reward": 1.3710325956344604, + "reward_std": 0.24134787917137146, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23277650773525238, + "rewards/EvidenceHallucination/std": 0.3827642798423767, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.7708439826965332, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.2578105926513672, + "rewards/VideoAccuracy/std": 0.31216996908187866, + "step": 490, + "train_speed(iter/s)": 0.017838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 639.0, + "completions/mean_length": 474.0952453613281, + "completions/min_length": 295.0, + "entropy/max": 0.5078125, + "entropy/mean": 0.318359375, + "entropy/min": 0.12890625, + "epoch": 0.491, + "grad_norm": 1.0706580417180322, + "kl": 0.26953125, + "learning_rate": 1.0444119499581261e-06, + "loss": 0.0027219271287322044, + "memory(GiB)": 147.17, + "reward": 2.362217426300049, + "reward_std": 0.09849338233470917, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5614845156669617, + "rewards/EvidenceHallucination/std": 0.41747772693634033, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 0.9830148816108704, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.0499204397201538, + "rewards/VideoAccuracy/std": 0.36109185218811035, + "step": 491, + "train_speed(iter/s)": 0.017842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2059.0, + "completions/mean_length": 746.5238037109375, + "completions/min_length": 429.0, + "entropy/max": 1.5859375, + "entropy/mean": 0.41015625, + "entropy/min": 0.1630859375, + "epoch": 0.492, + "grad_norm": 0.931213143371818, + "kl": 0.21875, + "learning_rate": 1.041241536864667e-06, + "loss": 0.002338796854019165, + "memory(GiB)": 147.17, + "reward": 1.6701347827911377, + "reward_std": 0.11329877376556396, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4935309588909149, + "rewards/EvidenceHallucination/std": 0.4419654309749603, + "rewards/Evidence_Num_Record/mean": 7.904762268066406, + "rewards/Evidence_Num_Record/std": 4.0713725090026855, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5714285969734192, + "rewards/VideoAccuracy/std": 0.5008703470230103, + "step": 492, + "train_speed(iter/s)": 0.01782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 850.0, + "completions/mean_length": 518.2619018554688, + "completions/min_length": 362.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.408203125, + "entropy/min": 0.224609375, + "epoch": 0.493, + "grad_norm": 1.178755812522848, + "kl": 0.27734375, + "learning_rate": 1.03807070846939e-06, + "loss": 0.002808667253702879, + "memory(GiB)": 147.17, + "reward": 1.5074771642684937, + "reward_std": 0.15796434879302979, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2707264721393585, + "rewards/EvidenceHallucination/std": 0.405273973941803, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.4256649017333984, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.41999852657318115, + "rewards/VideoAccuracy/std": 0.4554305672645569, + "step": 493, + "train_speed(iter/s)": 0.017798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1074.0, + "completions/mean_length": 543.0714111328125, + "completions/min_length": 286.0, + "entropy/max": 0.515625, + "entropy/mean": 0.32421875, + "entropy/min": 0.1435546875, + "epoch": 0.494, + "grad_norm": 1.0798772311424434, + "kl": 0.279296875, + "learning_rate": 1.034899496702501e-06, + "loss": 0.0028433247935026884, + "memory(GiB)": 147.17, + "reward": 2.1295299530029297, + "reward_std": 0.2256758213043213, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4543311297893524, + "rewards/EvidenceHallucination/std": 0.46849706768989563, + "rewards/Evidence_Num_Record/mean": 4.857142925262451, + "rewards/Evidence_Num_Record/std": 1.6315699815750122, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8719971179962158, + "rewards/VideoAccuracy/std": 0.5661816596984863, + "step": 494, + "train_speed(iter/s)": 0.017803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1678.0, + "completions/mean_length": 596.5238037109375, + "completions/min_length": 270.0, + "entropy/max": 2.265625, + "entropy/mean": 0.447265625, + "entropy/min": 0.1435546875, + "epoch": 0.495, + "grad_norm": 1.0783150340497494, + "kl": 0.24609375, + "learning_rate": 1.0317279334980677e-06, + "loss": 0.0025517649482935667, + "memory(GiB)": 147.17, + "reward": 1.7263377904891968, + "reward_std": 0.16748467087745667, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3050219416618347, + "rewards/EvidenceHallucination/std": 0.38823720812797546, + "rewards/Evidence_Num_Record/mean": 5.928571701049805, + "rewards/Evidence_Num_Record/std": 3.4738948345184326, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.5653334856033325, + "rewards/VideoAccuracy/std": 0.4182286858558655, + "step": 495, + "train_speed(iter/s)": 0.017799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1138.0, + "completions/mean_length": 542.7857055664062, + "completions/min_length": 384.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.365234375, + "entropy/min": 0.2275390625, + "epoch": 0.496, + "grad_norm": 1.1036716393029964, + "kl": 0.259765625, + "learning_rate": 1.0285560507936961e-06, + "loss": 0.0026476779021322727, + "memory(GiB)": 147.17, + "reward": 1.4483938217163086, + "reward_std": 0.36713075637817383, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2876743972301483, + "rewards/EvidenceHallucination/std": 0.43775710463523865, + "rewards/Evidence_Num_Record/mean": 5.285714149475098, + "rewards/Evidence_Num_Record/std": 2.3919143676757812, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3575255572795868, + "rewards/VideoAccuracy/std": 0.4473797082901001, + "step": 496, + "train_speed(iter/s)": 0.01779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1597.0, + "completions/mean_length": 579.0, + "completions/min_length": 345.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.34375, + "entropy/min": 0.13671875, + "epoch": 0.497, + "grad_norm": 1.179711608738581, + "kl": 0.275390625, + "learning_rate": 1.0253838805302104e-06, + "loss": 0.0028095985762774944, + "memory(GiB)": 147.17, + "reward": 2.179518699645996, + "reward_std": 0.2409399449825287, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6658817529678345, + "rewards/EvidenceHallucination/std": 0.39015546441078186, + "rewards/Evidence_Num_Record/mean": 5.38095235824585, + "rewards/Evidence_Num_Record/std": 3.1541872024536133, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.9463424682617188, + "rewards/VideoAccuracy/std": 0.3125475347042084, + "step": 497, + "train_speed(iter/s)": 0.017781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1561.0, + "completions/mean_length": 542.8809814453125, + "completions/min_length": 307.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.306640625, + "entropy/min": 0.1513671875, + "epoch": 0.498, + "grad_norm": 0.9690807003077019, + "kl": 0.25, + "learning_rate": 1.0222114546513293e-06, + "loss": 0.002958999713882804, + "memory(GiB)": 147.17, + "reward": 2.139566659927368, + "reward_std": 0.2018376588821411, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.47256433963775635, + "rewards/EvidenceHallucination/std": 0.4096042811870575, + "rewards/Evidence_Num_Record/mean": 5.357142925262451, + "rewards/Evidence_Num_Record/std": 4.071443557739258, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9117205739021301, + "rewards/VideoAccuracy/std": 0.3327580392360687, + "step": 498, + "train_speed(iter/s)": 0.017779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.047619047619047616, + "completions/max_length": 2625.0, + "completions/mean_length": 669.0, + "completions/min_length": 417.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.3203125, + "entropy/min": 0.08935546875, + "epoch": 0.499, + "grad_norm": 0.9967579922646225, + "kl": 0.2294921875, + "learning_rate": 1.0190388051033464e-06, + "loss": 0.0024905321188271046, + "memory(GiB)": 147.17, + "reward": 1.6246230602264404, + "reward_std": 0.24624596536159515, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37271884083747864, + "rewards/EvidenceHallucination/std": 0.3991992175579071, + "rewards/Evidence_Num_Record/mean": 5.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.7962408065795898, + "rewards/Format/mean": 0.9523809552192688, + "rewards/Format/std": 0.21554027497768402, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5405555963516235, + "rewards/VideoAccuracy/std": 0.47258710861206055, + "step": 499, + "train_speed(iter/s)": 0.017751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2351.0, + "completions/mean_length": 505.3095397949219, + "completions/min_length": 257.0, + "entropy/max": 0.609375, + "entropy/mean": 0.390625, + "entropy/min": 0.08447265625, + "epoch": 0.5, + "grad_norm": 1.2400215598471565, + "kl": 0.294921875, + "learning_rate": 1.015865963834808e-06, + "loss": 0.003047055331990123, + "memory(GiB)": 147.17, + "reward": 1.3536604642868042, + "reward_std": 0.3210704028606415, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.12089216709136963, + "rewards/EvidenceHallucination/std": 0.2773915231227875, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 1.8543041944503784, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.2628153860569, + "rewards/VideoAccuracy/std": 0.3835754096508026, + "step": 500, + "train_speed(iter/s)": 0.017751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 760.0, + "completions/mean_length": 479.5476379394531, + "completions/min_length": 295.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.345703125, + "entropy/min": 0.08544921875, + "epoch": 0.501, + "grad_norm": 1.0692538501971287, + "kl": 0.259765625, + "learning_rate": 1.0126929627961895e-06, + "loss": 0.0026359502226114273, + "memory(GiB)": 147.17, + "reward": 1.9519094228744507, + "reward_std": 0.2122296541929245, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33205512166023254, + "rewards/EvidenceHallucination/std": 0.4195026159286499, + "rewards/Evidence_Num_Record/mean": 4.309524059295654, + "rewards/Evidence_Num_Record/std": 1.5537225008010864, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6854982972145081, + "rewards/VideoAccuracy/std": 0.4974336624145508, + "step": 501, + "train_speed(iter/s)": 0.017725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2112.0, + "completions/mean_length": 731.7857055664062, + "completions/min_length": 376.0, + "entropy/max": 1.390625, + "entropy/mean": 0.373046875, + "entropy/min": 0.123046875, + "epoch": 0.502, + "grad_norm": 0.930144520848011, + "kl": 0.19921875, + "learning_rate": 1.0095198339395767e-06, + "loss": 0.0021102442406117916, + "memory(GiB)": 147.17, + "reward": 1.5032862424850464, + "reward_std": 0.25732943415641785, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.274224191904068, + "rewards/EvidenceHallucination/std": 0.38205230236053467, + "rewards/Evidence_Num_Record/mean": 7.642857074737549, + "rewards/Evidence_Num_Record/std": 5.728923320770264, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.41510799527168274, + "rewards/VideoAccuracy/std": 0.4720475673675537, + "step": 502, + "train_speed(iter/s)": 0.017707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1054.0, + "completions/mean_length": 549.0952758789062, + "completions/min_length": 378.0, + "entropy/max": 0.8203125, + "entropy/mean": 0.412109375, + "entropy/min": 0.16796875, + "epoch": 0.503, + "grad_norm": 1.1059638862010952, + "kl": 0.263671875, + "learning_rate": 1.006346609218342e-06, + "loss": 0.002717230934649706, + "memory(GiB)": 147.17, + "reward": 1.4658093452453613, + "reward_std": 0.3106657564640045, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30617669224739075, + "rewards/EvidenceHallucination/std": 0.39000555872917175, + "rewards/Evidence_Num_Record/mean": 5.6666669845581055, + "rewards/Evidence_Num_Record/std": 2.3957958221435547, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.37124067544937134, + "rewards/VideoAccuracy/std": 0.43431395292282104, + "step": 503, + "train_speed(iter/s)": 0.01771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 621.0, + "completions/mean_length": 459.3333435058594, + "completions/min_length": 291.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.337890625, + "entropy/min": 0.1513671875, + "epoch": 0.504, + "grad_norm": 1.2113299240988997, + "kl": 0.302734375, + "learning_rate": 1.0031733205868223e-06, + "loss": 0.0030504553578794003, + "memory(GiB)": 147.17, + "reward": 1.9867899417877197, + "reward_std": 0.23987329006195068, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5109496712684631, + "rewards/EvidenceHallucination/std": 0.45937153697013855, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.8665281534194946, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.717933177947998, + "rewards/VideoAccuracy/std": 0.6398183107376099, + "step": 504, + "train_speed(iter/s)": 0.017714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1561.0, + "completions/mean_length": 556.7142944335938, + "completions/min_length": 282.0, + "entropy/max": 0.671875, + "entropy/mean": 0.302734375, + "entropy/min": 0.140625, + "epoch": 0.505, + "grad_norm": 1.1487524994760716, + "kl": 0.2294921875, + "learning_rate": 1e-06, + "loss": 0.0023781340569257736, + "memory(GiB)": 147.17, + "reward": 1.9254145622253418, + "reward_std": 0.23551908135414124, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42413070797920227, + "rewards/EvidenceHallucination/std": 0.4278799891471863, + "rewards/Evidence_Num_Record/mean": 5.976190567016602, + "rewards/Evidence_Num_Record/std": 4.386721134185791, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.740588366985321, + "rewards/VideoAccuracy/std": 0.3732606768608093, + "step": 505, + "train_speed(iter/s)": 0.017705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2116.0, + "completions/mean_length": 698.1190795898438, + "completions/min_length": 339.0, + "entropy/max": 0.75, + "entropy/mean": 0.3515625, + "entropy/min": 0.119140625, + "epoch": 0.506, + "grad_norm": 1.02628192816372, + "kl": 0.2373046875, + "learning_rate": 9.968266794131776e-07, + "loss": 0.002447321079671383, + "memory(GiB)": 147.17, + "reward": 1.527622938156128, + "reward_std": 0.1635744571685791, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.17251500487327576, + "rewards/EvidenceHallucination/std": 0.32047125697135925, + "rewards/Evidence_Num_Record/mean": 6.976190567016602, + "rewards/Evidence_Num_Record/std": 6.387824058532715, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.45978668332099915, + "rewards/VideoAccuracy/std": 0.4627102017402649, + "step": 506, + "train_speed(iter/s)": 0.017683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1757.0, + "completions/mean_length": 569.9285888671875, + "completions/min_length": 358.0, + "entropy/max": 1.0703125, + "entropy/mean": 0.37109375, + "entropy/min": 0.1279296875, + "epoch": 0.507, + "grad_norm": 1.0982760174066724, + "kl": 0.267578125, + "learning_rate": 9.936533907816581e-07, + "loss": 0.0027626955416053534, + "memory(GiB)": 147.17, + "reward": 1.7680379152297974, + "reward_std": 0.3319416642189026, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3042178452014923, + "rewards/EvidenceHallucination/std": 0.4273426830768585, + "rewards/Evidence_Num_Record/mean": 5.595238208770752, + "rewards/Evidence_Num_Record/std": 4.219926834106445, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6071943044662476, + "rewards/VideoAccuracy/std": 0.5861407518386841, + "step": 507, + "train_speed(iter/s)": 0.017685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/mean_length": 548.3333740234375, + "completions/min_length": 297.0, + "entropy/max": 0.9453125, + "entropy/mean": 0.275390625, + "entropy/min": 0.1142578125, + "epoch": 0.508, + "grad_norm": 1.0287272845778204, + "kl": 0.2158203125, + "learning_rate": 9.904801660604232e-07, + "loss": 0.002213830128312111, + "memory(GiB)": 147.19, + "reward": 2.122735023498535, + "reward_std": 0.17406874895095825, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5652883052825928, + "rewards/EvidenceHallucination/std": 0.3997299373149872, + "rewards/Evidence_Num_Record/mean": 4.904761791229248, + "rewards/Evidence_Num_Record/std": 2.00984787940979, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5952380895614624, + "rewards/HonestTime/std": 0.49679574370384216, + "rewards/VideoAccuracy/mean": 0.890629768371582, + "rewards/VideoAccuracy/std": 0.47267624735832214, + "step": 508, + "train_speed(iter/s)": 0.017689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 978.0, + "completions/mean_length": 532.9761962890625, + "completions/min_length": 339.0, + "entropy/max": 2.03125, + "entropy/mean": 0.435546875, + "entropy/min": 0.248046875, + "epoch": 0.509, + "grad_norm": 0.9878512105595498, + "kl": 0.2392578125, + "learning_rate": 9.873070372038104e-07, + "loss": 0.002451932290568948, + "memory(GiB)": 147.2, + "reward": 1.5864818096160889, + "reward_std": 0.10923613607883453, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3566635251045227, + "rewards/EvidenceHallucination/std": 0.4345127046108246, + "rewards/Evidence_Num_Record/mean": 5.047619342803955, + "rewards/Evidence_Num_Record/std": 1.9871830940246582, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.48181581497192383, + "rewards/VideoAccuracy/std": 0.4758481979370117, + "step": 509, + "train_speed(iter/s)": 0.017663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 736.0, + "completions/mean_length": 496.9761962890625, + "completions/min_length": 324.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.380859375, + "entropy/min": 0.263671875, + "epoch": 0.51, + "grad_norm": 1.0558917088281818, + "kl": 0.275390625, + "learning_rate": 9.84134036165192e-07, + "loss": 0.0027638720348477364, + "memory(GiB)": 147.2, + "reward": 1.5433754920959473, + "reward_std": 0.17969533801078796, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31144264340400696, + "rewards/EvidenceHallucination/std": 0.4260638356208801, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.2308934926986694, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.4144202768802643, + "rewards/VideoAccuracy/std": 0.44290584325790405, + "step": 510, + "train_speed(iter/s)": 0.017667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2512.0, + "completions/mean_length": 530.357177734375, + "completions/min_length": 280.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.2451171875, + "entropy/min": 0.072265625, + "epoch": 0.511, + "grad_norm": 0.9613440294327296, + "kl": 0.2265625, + "learning_rate": 9.809611948966533e-07, + "loss": 0.0024141266476362944, + "memory(GiB)": 147.2, + "reward": 2.5995254516601562, + "reward_std": 0.0887753814458847, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6758074760437012, + "rewards/EvidenceHallucination/std": 0.34982067346572876, + "rewards/Evidence_Num_Record/mean": 4.88095235824585, + "rewards/Evidence_Num_Record/std": 4.139060974121094, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.2643641233444214, + "rewards/VideoAccuracy/std": 0.19649749994277954, + "step": 511, + "train_speed(iter/s)": 0.017652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1766.0, + "completions/mean_length": 693.7142944335938, + "completions/min_length": 451.0, + "entropy/max": 0.98046875, + "entropy/mean": 0.392578125, + "entropy/min": 0.19921875, + "epoch": 0.512, + "grad_norm": 1.0210448250392559, + "kl": 0.2177734375, + "learning_rate": 9.777885453486706e-07, + "loss": 0.00225750170648098, + "memory(GiB)": 147.2, + "reward": 1.4917278289794922, + "reward_std": 0.17943572998046875, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2786943018436432, + "rewards/EvidenceHallucination/std": 0.42361971735954285, + "rewards/Evidence_Num_Record/mean": 6.904762268066406, + "rewards/Evidence_Num_Record/std": 4.0713725090026855, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.4026556611061096, + "rewards/VideoAccuracy/std": 0.4588158428668976, + "step": 512, + "train_speed(iter/s)": 0.017649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1935.0, + "completions/mean_length": 558.5238037109375, + "completions/min_length": 275.0, + "entropy/max": 0.546875, + "entropy/mean": 0.380859375, + "entropy/min": 0.1240234375, + "epoch": 0.513, + "grad_norm": 0.9943325329837618, + "kl": 0.2353515625, + "learning_rate": 9.746161194697893e-07, + "loss": 0.0024553914554417133, + "memory(GiB)": 147.2, + "reward": 1.500482201576233, + "reward_std": 0.2269335240125656, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430334210395813, + "rewards/EvidenceHallucination/mean": 0.2552522122859955, + "rewards/EvidenceHallucination/std": 0.39110541343688965, + "rewards/Evidence_Num_Record/mean": 5.238095283508301, + "rewards/Evidence_Num_Record/std": 4.004932880401611, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.428003191947937, + "rewards/VideoAccuracy/std": 0.4602835178375244, + "step": 513, + "train_speed(iter/s)": 0.017624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1042.0, + "completions/mean_length": 525.5, + "completions/min_length": 359.0, + "entropy/max": 0.515625, + "entropy/mean": 0.3125, + "entropy/min": 0.2001953125, + "epoch": 0.514, + "grad_norm": 1.175745174405539, + "kl": 0.267578125, + "learning_rate": 9.714439492063038e-07, + "loss": 0.002724867779761553, + "memory(GiB)": 147.2, + "reward": 2.0888025760650635, + "reward_std": 0.16696274280548096, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4821758270263672, + "rewards/EvidenceHallucination/std": 0.4331739544868469, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.9006997346878052, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8257007002830505, + "rewards/VideoAccuracy/std": 0.4494706392288208, + "step": 514, + "train_speed(iter/s)": 0.017628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1148.0, + "completions/mean_length": 579.452392578125, + "completions/min_length": 332.0, + "entropy/max": 0.6875, + "entropy/mean": 0.314453125, + "entropy/min": 0.1650390625, + "epoch": 0.515, + "grad_norm": 1.2232727483994883, + "kl": 0.2265625, + "learning_rate": 9.682720665019325e-07, + "loss": 0.002336142584681511, + "memory(GiB)": 147.2, + "reward": 1.8602702617645264, + "reward_std": 0.3273540437221527, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3450998067855835, + "rewards/EvidenceHallucination/std": 0.3622949421405792, + "rewards/Evidence_Num_Record/mean": 5.714285850524902, + "rewards/Evidence_Num_Record/std": 3.535780429840088, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.7007741332054138, + "rewards/VideoAccuracy/std": 0.39636293053627014, + "step": 515, + "train_speed(iter/s)": 0.017627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/mean_length": 567.6190795898438, + "completions/min_length": 331.0, + "entropy/max": 0.7421875, + "entropy/mean": 0.39453125, + "entropy/min": 0.1875, + "epoch": 0.516, + "grad_norm": 1.0995164355839078, + "kl": 0.23046875, + "learning_rate": 9.651005032974993e-07, + "loss": 0.002389857079833746, + "memory(GiB)": 147.2, + "reward": 1.4132567644119263, + "reward_std": 0.21354961395263672, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22768878936767578, + "rewards/EvidenceHallucination/std": 0.3905121982097626, + "rewards/Evidence_Num_Record/mean": 6.0714287757873535, + "rewards/Evidence_Num_Record/std": 4.37505578994751, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.3391473889350891, + "rewards/VideoAccuracy/std": 0.4415587782859802, + "step": 516, + "train_speed(iter/s)": 0.0176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1162.0, + "completions/mean_length": 599.0238037109375, + "completions/min_length": 379.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.345703125, + "entropy/min": 0.1552734375, + "epoch": 0.517, + "grad_norm": 0.9684381587992229, + "kl": 0.259765625, + "learning_rate": 9.619292915306101e-07, + "loss": 0.0026290405075997114, + "memory(GiB)": 147.2, + "reward": 1.848392367362976, + "reward_std": 0.20444540679454803, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5357811450958252, + "rewards/EvidenceHallucination/std": 0.40236812829971313, + "rewards/Evidence_Num_Record/mean": 5.523809432983398, + "rewards/Evidence_Num_Record/std": 2.1890869140625, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008703470230103, + "rewards/VideoAccuracy/mean": 0.6555217504501343, + "rewards/VideoAccuracy/std": 0.42811861634254456, + "step": 517, + "train_speed(iter/s)": 0.017596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/mean_length": 570.3095092773438, + "completions/min_length": 362.0, + "entropy/max": 2.328125, + "entropy/mean": 0.36328125, + "entropy/min": 0.1572265625, + "epoch": 0.518, + "grad_norm": 1.0215522190583068, + "kl": 0.21875, + "learning_rate": 9.587584631353328e-07, + "loss": 0.0022501437924802303, + "memory(GiB)": 147.2, + "reward": 2.1257436275482178, + "reward_std": 0.17595742642879486, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3400658369064331, + "rewards/EvidenceHallucination/std": 0.4058413505554199, + "rewards/Evidence_Num_Record/mean": 5.285714149475098, + "rewards/Evidence_Num_Record/std": 2.7075836658477783, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.9291589856147766, + "rewards/VideoAccuracy/std": 0.29189977049827576, + "step": 518, + "train_speed(iter/s)": 0.017591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1267.0, + "completions/mean_length": 593.547607421875, + "completions/min_length": 334.0, + "entropy/max": 0.609375, + "entropy/mean": 0.330078125, + "entropy/min": 0.171875, + "epoch": 0.519, + "grad_norm": 1.0948985117161494, + "kl": 0.2314453125, + "learning_rate": 9.555880500418738e-07, + "loss": 0.0023919104132801294, + "memory(GiB)": 147.2, + "reward": 1.7303181886672974, + "reward_std": 0.1799202859401703, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46469253301620483, + "rewards/EvidenceHallucination/std": 0.47335711121559143, + "rewards/Evidence_Num_Record/mean": 5.238095283508301, + "rewards/Evidence_Num_Record/std": 2.602152109146118, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5754749178886414, + "rewards/VideoAccuracy/std": 0.45630866289138794, + "step": 519, + "train_speed(iter/s)": 0.017581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 911.0, + "completions/mean_length": 551.857177734375, + "completions/min_length": 362.0, + "entropy/max": 0.4921875, + "entropy/mean": 0.3671875, + "entropy/min": 0.2412109375, + "epoch": 0.52, + "grad_norm": 1.1137900116256731, + "kl": 0.255859375, + "learning_rate": 9.524180841762576e-07, + "loss": 0.0026009499561041594, + "memory(GiB)": 147.2, + "reward": 1.5844050645828247, + "reward_std": 0.24579095840454102, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.298897922039032, + "rewards/EvidenceHallucination/std": 0.41049379110336304, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 1.627293348312378, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.46272075176239014, + "rewards/VideoAccuracy/std": 0.48835301399230957, + "step": 520, + "train_speed(iter/s)": 0.017582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/mean_length": 444.952392578125, + "completions/min_length": 275.0, + "entropy/max": 0.5, + "entropy/mean": 0.298828125, + "entropy/min": 0.15234375, + "epoch": 0.521, + "grad_norm": 1.1618956968259504, + "kl": 0.2412109375, + "learning_rate": 9.492485974600059e-07, + "loss": 0.0024179406464099884, + "memory(GiB)": 147.2, + "reward": 2.2382819652557373, + "reward_std": 0.11290633678436279, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5738786458969116, + "rewards/EvidenceHallucination/std": 0.42459413409233093, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.8125753998756409, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9235062599182129, + "rewards/VideoAccuracy/std": 0.4135523736476898, + "step": 521, + "train_speed(iter/s)": 0.017587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 743.5952758789062, + "completions/min_length": 391.0, + "entropy/max": 2.203125, + "entropy/mean": 0.4375, + "entropy/min": 0.1083984375, + "epoch": 0.522, + "grad_norm": 1.0285302054727505, + "kl": 0.2060546875, + "learning_rate": 9.460796218098141e-07, + "loss": 0.002161647193133831, + "memory(GiB)": 147.2, + "reward": 1.5938557386398315, + "reward_std": 0.14812681078910828, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42208966612815857, + "rewards/EvidenceHallucination/std": 0.4313488006591797, + "rewards/Evidence_Num_Record/mean": 6.833333492279053, + "rewards/Evidence_Num_Record/std": 4.637685775756836, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430335700511932, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.4880092740058899, + "rewards/VideoAccuracy/std": 0.48101186752319336, + "step": 522, + "train_speed(iter/s)": 0.017569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 837.0, + "completions/mean_length": 577.8095092773438, + "completions/min_length": 388.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.35546875, + "entropy/min": 0.20703125, + "epoch": 0.523, + "grad_norm": 1.1973145091114252, + "kl": 0.2294921875, + "learning_rate": 9.429111891372319e-07, + "loss": 0.002317069796845317, + "memory(GiB)": 147.2, + "reward": 1.5629115104675293, + "reward_std": 0.2889779508113861, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3631332814693451, + "rewards/EvidenceHallucination/std": 0.4347761571407318, + "rewards/Evidence_Num_Record/mean": 5.38095235824585, + "rewards/Evidence_Num_Record/std": 2.0947365760803223, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.4236181974411011, + "rewards/VideoAccuracy/std": 0.4120829105377197, + "step": 523, + "train_speed(iter/s)": 0.017567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 791.0, + "completions/mean_length": 549.4285888671875, + "completions/min_length": 361.0, + "entropy/max": 0.455078125, + "entropy/mean": 0.322265625, + "entropy/min": 0.1708984375, + "epoch": 0.524, + "grad_norm": 1.0442632944342252, + "kl": 0.255859375, + "learning_rate": 9.397433313483416e-07, + "loss": 0.0025761870201677084, + "memory(GiB)": 147.2, + "reward": 2.4195916652679443, + "reward_std": 0.21475231647491455, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6845204830169678, + "rewards/EvidenceHallucination/std": 0.37251976132392883, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 1.1466256380081177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 1.120782732963562, + "rewards/VideoAccuracy/std": 0.5933963060379028, + "step": 524, + "train_speed(iter/s)": 0.017571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1039.0, + "completions/mean_length": 559.4761962890625, + "completions/min_length": 319.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.36328125, + "entropy/min": 0.1337890625, + "epoch": 0.525, + "grad_norm": 1.1559230240005511, + "kl": 0.2294921875, + "learning_rate": 9.365760803434354e-07, + "loss": 0.002364410785958171, + "memory(GiB)": 147.2, + "reward": 1.8699347972869873, + "reward_std": 0.2940487563610077, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3227474093437195, + "rewards/EvidenceHallucination/std": 0.3658566474914551, + "rewards/Evidence_Num_Record/mean": 5.047619342803955, + "rewards/Evidence_Num_Record/std": 2.398702621459961, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.7101471424102783, + "rewards/VideoAccuracy/std": 0.522103488445282, + "step": 525, + "train_speed(iter/s)": 0.017568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 974.0, + "completions/mean_length": 529.5952758789062, + "completions/min_length": 375.0, + "entropy/max": 0.53125, + "entropy/mean": 0.33984375, + "entropy/min": 0.201171875, + "epoch": 0.526, + "grad_norm": 1.2709692704094622, + "kl": 0.263671875, + "learning_rate": 9.33409468016696e-07, + "loss": 0.0026475153863430023, + "memory(GiB)": 147.2, + "reward": 1.5840200185775757, + "reward_std": 0.40640494227409363, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33685699105262756, + "rewards/EvidenceHallucination/std": 0.4412052631378174, + "rewards/Evidence_Num_Record/mean": 5.095238208770752, + "rewards/Evidence_Num_Record/std": 1.7080801725387573, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.4499819576740265, + "rewards/VideoAccuracy/std": 0.42338553071022034, + "step": 526, + "train_speed(iter/s)": 0.017559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 900.0, + "completions/mean_length": 569.547607421875, + "completions/min_length": 411.0, + "entropy/max": 0.546875, + "entropy/mean": 0.37109375, + "entropy/min": 0.216796875, + "epoch": 0.527, + "grad_norm": 0.9895986600311217, + "kl": 0.255859375, + "learning_rate": 9.302435262558747e-07, + "loss": 0.0026058589573949575, + "memory(GiB)": 147.2, + "reward": 1.4728721380233765, + "reward_std": 0.16476371884346008, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21637868881225586, + "rewards/EvidenceHallucination/std": 0.3982798755168915, + "rewards/Evidence_Num_Record/mean": 5.428571701049805, + "rewards/Evidence_Num_Record/std": 1.6400901079177856, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.33912017941474915, + "rewards/VideoAccuracy/std": 0.46005764603614807, + "step": 527, + "train_speed(iter/s)": 0.017555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 797.0, + "completions/mean_length": 532.0714111328125, + "completions/min_length": 312.0, + "entropy/max": 1.46875, + "entropy/mean": 0.328125, + "entropy/min": 0.150390625, + "epoch": 0.528, + "grad_norm": 0.8471361905347333, + "kl": 0.2314453125, + "learning_rate": 9.270782869419693e-07, + "loss": 0.002341092098504305, + "memory(GiB)": 147.2, + "reward": 1.8492215871810913, + "reward_std": 0.05589429661631584, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20401684939861298, + "rewards/EvidenceHallucination/std": 0.35366642475128174, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.1906039714813232, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6750848889350891, + "rewards/VideoAccuracy/std": 0.5081431269645691, + "step": 528, + "train_speed(iter/s)": 0.017557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 987.0, + "completions/mean_length": 577.2380981445312, + "completions/min_length": 364.0, + "entropy/max": 0.9375, + "entropy/mean": 0.396484375, + "entropy/min": 0.2177734375, + "epoch": 0.529, + "grad_norm": 1.2653717911738396, + "kl": 0.23046875, + "learning_rate": 9.239137819489047e-07, + "loss": 0.0023404674138873816, + "memory(GiB)": 147.2, + "reward": 1.7768093347549438, + "reward_std": 0.20981763303279877, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5229452848434448, + "rewards/EvidenceHallucination/std": 0.42321765422821045, + "rewards/Evidence_Num_Record/mean": 5.238095283508301, + "rewards/Evidence_Num_Record/std": 1.6646909713745117, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.5817440152168274, + "rewards/VideoAccuracy/std": 0.3769208788871765, + "step": 529, + "train_speed(iter/s)": 0.017545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 789.0, + "completions/mean_length": 527.6428833007812, + "completions/min_length": 379.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.396484375, + "entropy/min": 0.287109375, + "epoch": 0.53, + "grad_norm": 1.0116197589195777, + "kl": 0.251953125, + "learning_rate": 9.207500431432113e-07, + "loss": 0.002533841645345092, + "memory(GiB)": 147.2, + "reward": 1.3694162368774414, + "reward_std": 0.17520248889923096, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16798247396945953, + "rewards/EvidenceHallucination/std": 0.3317740559577942, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.3460484743118286, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.2739149332046509, + "rewards/VideoAccuracy/std": 0.46860837936401367, + "step": 530, + "train_speed(iter/s)": 0.017547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 792.0, + "completions/mean_length": 531.8095092773438, + "completions/min_length": 383.0, + "entropy/max": 0.3671875, + "entropy/mean": 0.263671875, + "entropy/min": 0.10693359375, + "epoch": 0.531, + "grad_norm": 1.0389044122120064, + "kl": 0.21875, + "learning_rate": 9.17587102383704e-07, + "loss": 0.002218902111053467, + "memory(GiB)": 147.2, + "reward": 2.226511001586914, + "reward_std": 0.09947007894515991, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5426416993141174, + "rewards/EvidenceHallucination/std": 0.4090539813041687, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 0.4068232774734497, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9179825782775879, + "rewards/VideoAccuracy/std": 0.4307158589363098, + "step": 531, + "train_speed(iter/s)": 0.017547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1128.0, + "completions/mean_length": 521.6428833007812, + "completions/min_length": 342.0, + "entropy/max": 0.94140625, + "entropy/mean": 0.396484375, + "entropy/min": 0.173828125, + "epoch": 0.532, + "grad_norm": 1.23100170161689, + "kl": 0.2490234375, + "learning_rate": 9.144249915211605e-07, + "loss": 0.002525723772123456, + "memory(GiB)": 147.2, + "reward": 1.67011559009552, + "reward_std": 0.3325250446796417, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4056369364261627, + "rewards/EvidenceHallucination/std": 0.41158750653266907, + "rewards/Evidence_Num_Record/mean": 4.833333492279053, + "rewards/Evidence_Num_Record/std": 1.98674476146698, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5223214626312256, + "rewards/VideoAccuracy/std": 0.44740840792655945, + "step": 532, + "train_speed(iter/s)": 0.017547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1259.0, + "completions/mean_length": 576.547607421875, + "completions/min_length": 433.0, + "entropy/max": 0.828125, + "entropy/mean": 0.365234375, + "entropy/min": 0.162109375, + "epoch": 0.533, + "grad_norm": 1.1137092359902618, + "kl": 0.2294921875, + "learning_rate": 9.11263742398002e-07, + "loss": 0.002338199643418193, + "memory(GiB)": 147.2, + "reward": 1.3284426927566528, + "reward_std": 0.2385426014661789, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13656195998191833, + "rewards/EvidenceHallucination/std": 0.3124164640903473, + "rewards/Evidence_Num_Record/mean": 5.333333492279053, + "rewards/Evidence_Num_Record/std": 2.204946517944336, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.2392255663871765, + "rewards/VideoAccuracy/std": 0.34489506483078003, + "step": 533, + "train_speed(iter/s)": 0.017546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 637.0, + "completions/mean_length": 488.16668701171875, + "completions/min_length": 305.0, + "entropy/max": 0.41796875, + "entropy/mean": 0.28125, + "entropy/min": 0.18359375, + "epoch": 0.534, + "grad_norm": 1.1890010499188732, + "kl": 0.263671875, + "learning_rate": 9.081033868479726e-07, + "loss": 0.002650885609909892, + "memory(GiB)": 147.2, + "reward": 2.124683141708374, + "reward_std": 0.21604083478450775, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5279164910316467, + "rewards/EvidenceHallucination/std": 0.4119936227798462, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.8323455452919006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.8571950793266296, + "rewards/VideoAccuracy/std": 0.5043355822563171, + "step": 534, + "train_speed(iter/s)": 0.017549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 662.0, + "completions/mean_length": 451.3809509277344, + "completions/min_length": 283.0, + "entropy/max": 0.91015625, + "entropy/mean": 0.40234375, + "entropy/min": 0.177734375, + "epoch": 0.535, + "grad_norm": 1.1734850505828, + "kl": 0.255859375, + "learning_rate": 9.049439566958175e-07, + "loss": 0.0025756550021469593, + "memory(GiB)": 147.2, + "reward": 1.7241538763046265, + "reward_std": 0.05795424431562424, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3884280025959015, + "rewards/EvidenceHallucination/std": 0.4080182909965515, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 1.0809296369552612, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.5512299537658691, + "rewards/VideoAccuracy/std": 0.3939768075942993, + "step": 535, + "train_speed(iter/s)": 0.017561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 719.0, + "completions/mean_length": 475.16668701171875, + "completions/min_length": 270.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.3515625, + "entropy/min": 0.2001953125, + "epoch": 0.536, + "grad_norm": 1.3036157570263955, + "kl": 0.25390625, + "learning_rate": 9.017854837569628e-07, + "loss": 0.002558166394010186, + "memory(GiB)": 147.2, + "reward": 1.823333978652954, + "reward_std": 0.2030932903289795, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5309664011001587, + "rewards/EvidenceHallucination/std": 0.4735872149467468, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.9093654155731201, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6171407103538513, + "rewards/VideoAccuracy/std": 0.4215722680091858, + "step": 536, + "train_speed(iter/s)": 0.017565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 761.0, + "completions/mean_length": 501.0, + "completions/min_length": 324.0, + "entropy/max": 0.73046875, + "entropy/mean": 0.36328125, + "entropy/min": 0.205078125, + "epoch": 0.537, + "grad_norm": 1.1426002973461102, + "kl": 0.255859375, + "learning_rate": 8.986279998371967e-07, + "loss": 0.0025861049070954323, + "memory(GiB)": 147.2, + "reward": 1.8708044290542603, + "reward_std": 0.1367248147726059, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4680973291397095, + "rewards/EvidenceHallucination/std": 0.4764857888221741, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.214507818222046, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6771848201751709, + "rewards/VideoAccuracy/std": 0.5465537309646606, + "step": 537, + "train_speed(iter/s)": 0.017568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1004.0, + "completions/mean_length": 559.357177734375, + "completions/min_length": 355.0, + "entropy/max": 0.8515625, + "entropy/mean": 0.375, + "entropy/min": 0.119140625, + "epoch": 0.538, + "grad_norm": 0.8859204160097884, + "kl": 0.2109375, + "learning_rate": 8.954715367323466e-07, + "loss": 0.002143596298992634, + "memory(GiB)": 147.2, + "reward": 1.7805614471435547, + "reward_std": 0.1892300844192505, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16119354963302612, + "rewards/EvidenceHallucination/std": 0.3293311595916748, + "rewards/Evidence_Num_Record/mean": 4.833333492279053, + "rewards/Evidence_Num_Record/std": 1.9116672277450562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6197511553764343, + "rewards/VideoAccuracy/std": 0.45194223523139954, + "step": 538, + "train_speed(iter/s)": 0.017545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/mean_length": 518.5952758789062, + "completions/min_length": 267.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.369140625, + "entropy/min": 0.2060546875, + "epoch": 0.539, + "grad_norm": 1.2069866276978913, + "kl": 0.2451171875, + "learning_rate": 8.923161262279611e-07, + "loss": 0.0024995713029056787, + "memory(GiB)": 147.2, + "reward": 1.718224287033081, + "reward_std": 0.14783141016960144, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.43049150705337524, + "rewards/EvidenceHallucination/std": 0.4441774785518646, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.0581248998641968, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.5368879437446594, + "rewards/VideoAccuracy/std": 0.394218385219574, + "step": 539, + "train_speed(iter/s)": 0.017549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1152.0, + "completions/mean_length": 514.2142944335938, + "completions/min_length": 371.0, + "entropy/max": 0.765625, + "entropy/mean": 0.359375, + "entropy/min": 0.173828125, + "epoch": 0.54, + "grad_norm": 1.15138653953421, + "kl": 0.255859375, + "learning_rate": 8.89161800098989e-07, + "loss": 0.002595373895019293, + "memory(GiB)": 147.2, + "reward": 1.496476650238037, + "reward_std": 0.1784413456916809, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2719321548938751, + "rewards/EvidenceHallucination/std": 0.39456066489219666, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.9624820947647095, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.38018539547920227, + "rewards/VideoAccuracy/std": 0.45438462495803833, + "step": 540, + "train_speed(iter/s)": 0.017548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 743.0, + "completions/mean_length": 475.5, + "completions/min_length": 332.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.2890625, + "entropy/min": 0.11279296875, + "epoch": 0.541, + "grad_norm": 1.0596125947616801, + "kl": 0.2392578125, + "learning_rate": 8.860085901094594e-07, + "loss": 0.0024353486951440573, + "memory(GiB)": 147.2, + "reward": 2.333366870880127, + "reward_std": 0.11621489375829697, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3714437484741211, + "rewards/EvidenceHallucination/std": 0.44054171442985535, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.8540400862693787, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.0590778589248657, + "rewards/VideoAccuracy/std": 0.42696964740753174, + "step": 541, + "train_speed(iter/s)": 0.01755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 957.0, + "completions/mean_length": 497.452392578125, + "completions/min_length": 246.0, + "entropy/max": 0.79296875, + "entropy/mean": 0.38671875, + "entropy/min": 0.212890625, + "epoch": 0.542, + "grad_norm": 0.8243397697316406, + "kl": 0.259765625, + "learning_rate": 8.828565280121617e-07, + "loss": 0.0026443253736943007, + "memory(GiB)": 147.2, + "reward": 1.4261549711227417, + "reward_std": 0.12304135411977768, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.208102747797966, + "rewards/EvidenceHallucination/std": 0.3766601085662842, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.5311325788497925, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.3178676962852478, + "rewards/VideoAccuracy/std": 0.4038638174533844, + "step": 542, + "train_speed(iter/s)": 0.017558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1040.0, + "completions/mean_length": 543.857177734375, + "completions/min_length": 364.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.3828125, + "entropy/min": 0.26171875, + "epoch": 0.543, + "grad_norm": 1.1700946200322362, + "kl": 0.23828125, + "learning_rate": 8.797056455483266e-07, + "loss": 0.002405008068308234, + "memory(GiB)": 147.2, + "reward": 1.1764250993728638, + "reward_std": 0.22184321284294128, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.09296326339244843, + "rewards/EvidenceHallucination/std": 0.2726643681526184, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.2280595302581787, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.095238097012043, + "rewards/HonestTime/std": 0.297101765871048, + "rewards/VideoAccuracy/mean": 0.13878484070301056, + "rewards/VideoAccuracy/std": 0.26740172505378723, + "step": 543, + "train_speed(iter/s)": 0.017556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 780.0, + "completions/mean_length": 481.4761962890625, + "completions/min_length": 293.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.337890625, + "entropy/min": 0.12353515625, + "epoch": 0.544, + "grad_norm": 1.0793033530846348, + "kl": 0.279296875, + "learning_rate": 8.765559744473053e-07, + "loss": 0.003017617389559746, + "memory(GiB)": 147.2, + "reward": 1.6041492223739624, + "reward_std": 0.24795274436473846, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1896679550409317, + "rewards/EvidenceHallucination/std": 0.3711082339286804, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.0682299137115479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.4043109118938446, + "rewards/VideoAccuracy/std": 0.47207334637641907, + "step": 544, + "train_speed(iter/s)": 0.01756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1575.0, + "completions/mean_length": 510.16668701171875, + "completions/min_length": 237.0, + "entropy/max": 0.984375, + "entropy/mean": 0.333984375, + "entropy/min": 0.1376953125, + "epoch": 0.545, + "grad_norm": 1.056642920217122, + "kl": 0.2490234375, + "learning_rate": 8.734075464262506e-07, + "loss": 0.0025731062050908804, + "memory(GiB)": 147.2, + "reward": 2.0171689987182617, + "reward_std": 0.056373246014118195, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4805810749530792, + "rewards/EvidenceHallucination/std": 0.3956851661205292, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 2.6864826679229736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8210528492927551, + "rewards/VideoAccuracy/std": 0.41951730847358704, + "step": 545, + "train_speed(iter/s)": 0.017557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1076.0, + "completions/mean_length": 516.0714111328125, + "completions/min_length": 293.0, + "entropy/max": 0.828125, + "entropy/mean": 0.353515625, + "entropy/min": 0.11669921875, + "epoch": 0.546, + "grad_norm": 1.0850137151077015, + "kl": 0.2451171875, + "learning_rate": 8.702603931897981e-07, + "loss": 0.002520657144486904, + "memory(GiB)": 147.2, + "reward": 1.4326279163360596, + "reward_std": 0.28107914328575134, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32118892669677734, + "rewards/EvidenceHallucination/std": 0.4417652189731598, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 1.9871830940246582, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.31600916385650635, + "rewards/VideoAccuracy/std": 0.4143351912498474, + "step": 546, + "train_speed(iter/s)": 0.017552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1199.0, + "completions/mean_length": 524.7619018554688, + "completions/min_length": 341.0, + "entropy/max": 0.578125, + "entropy/mean": 0.337890625, + "entropy/min": 0.21484375, + "epoch": 0.547, + "grad_norm": 0.9859721647461079, + "kl": 0.267578125, + "learning_rate": 8.671145464297459e-07, + "loss": 0.0027126925997436047, + "memory(GiB)": 147.2, + "reward": 1.842204213142395, + "reward_std": 0.20456859469413757, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4958741366863251, + "rewards/EvidenceHallucination/std": 0.45373860001564026, + "rewards/Evidence_Num_Record/mean": 5.047619342803955, + "rewards/Evidence_Num_Record/std": 3.863224744796753, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6430291533470154, + "rewards/VideoAccuracy/std": 0.5219913721084595, + "step": 547, + "train_speed(iter/s)": 0.01755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1603.0, + "completions/mean_length": 568.952392578125, + "completions/min_length": 288.0, + "entropy/max": 2.515625, + "entropy/mean": 0.376953125, + "entropy/min": 0.12353515625, + "epoch": 0.548, + "grad_norm": 0.869066911510286, + "kl": 0.2109375, + "learning_rate": 8.63970037824736e-07, + "loss": 0.002374354749917984, + "memory(GiB)": 147.2, + "reward": 1.8578296899795532, + "reward_std": 0.1573992371559143, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35328730940818787, + "rewards/EvidenceHallucination/std": 0.4390169084072113, + "rewards/Evidence_Num_Record/mean": 5.095238208770752, + "rewards/Evidence_Num_Record/std": 3.4486443996429443, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6538389325141907, + "rewards/VideoAccuracy/std": 0.44841691851615906, + "step": 548, + "train_speed(iter/s)": 0.017548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1302.0, + "completions/mean_length": 544.2857055664062, + "completions/min_length": 341.0, + "entropy/max": 1.65625, + "entropy/mean": 0.439453125, + "entropy/min": 0.158203125, + "epoch": 0.549, + "grad_norm": 1.2103727378080085, + "kl": 0.251953125, + "learning_rate": 8.608268990399348e-07, + "loss": 0.0025931699201464653, + "memory(GiB)": 147.2, + "reward": 1.9039450883865356, + "reward_std": 0.19438162446022034, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6002238392829895, + "rewards/EvidenceHallucination/std": 0.36550626158714294, + "rewards/Evidence_Num_Record/mean": 5.0714287757873535, + "rewards/Evidence_Num_Record/std": 2.588839530944824, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6839002966880798, + "rewards/VideoAccuracy/std": 0.3481704890727997, + "step": 549, + "train_speed(iter/s)": 0.017541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1041.0, + "completions/mean_length": 562.0952758789062, + "completions/min_length": 294.0, + "entropy/max": 0.625, + "entropy/mean": 0.365234375, + "entropy/min": 0.20703125, + "epoch": 0.55, + "grad_norm": 1.231787019614606, + "kl": 0.23046875, + "learning_rate": 8.576851617267149e-07, + "loss": 0.002344725653529167, + "memory(GiB)": 147.2, + "reward": 1.6168367862701416, + "reward_std": 0.34122419357299805, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4344601631164551, + "rewards/EvidenceHallucination/std": 0.4488111436367035, + "rewards/Evidence_Num_Record/mean": 5.333333492279053, + "rewards/Evidence_Num_Record/std": 1.9084748029708862, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5013732314109802, + "rewards/VideoAccuracy/std": 0.5075318217277527, + "step": 550, + "train_speed(iter/s)": 0.01754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1269.0, + "completions/mean_length": 571.3095092773438, + "completions/min_length": 337.0, + "entropy/max": 0.470703125, + "entropy/mean": 0.265625, + "entropy/min": 0.1435546875, + "epoch": 0.551, + "grad_norm": 1.0605074789592959, + "kl": 0.21875, + "learning_rate": 8.545448575223368e-07, + "loss": 0.002243774477392435, + "memory(GiB)": 147.2, + "reward": 2.0291800498962402, + "reward_std": 0.2675192952156067, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45944949984550476, + "rewards/EvidenceHallucination/std": 0.4359951615333557, + "rewards/Evidence_Num_Record/mean": 5.023809432983398, + "rewards/Evidence_Num_Record/std": 2.727350950241089, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430335700511932, + "rewards/VideoAccuracy/mean": 0.7420520186424255, + "rewards/VideoAccuracy/std": 0.48766934871673584, + "step": 551, + "train_speed(iter/s)": 0.017536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/mean_length": 530.7142944335938, + "completions/min_length": 252.0, + "entropy/max": 1.96875, + "entropy/mean": 0.42578125, + "entropy/min": 0.2216796875, + "epoch": 0.552, + "grad_norm": 1.008962727340414, + "kl": 0.2470703125, + "learning_rate": 8.514060180496284e-07, + "loss": 0.002557076746597886, + "memory(GiB)": 147.2, + "reward": 1.6508671045303345, + "reward_std": 0.08526341617107391, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.34803465008735657, + "rewards/EvidenceHallucination/std": 0.3888440430164337, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 2.1068992614746094, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5193555355072021, + "rewards/VideoAccuracy/std": 0.46005895733833313, + "step": 552, + "train_speed(iter/s)": 0.017541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 995.0, + "completions/mean_length": 500.0238037109375, + "completions/min_length": 344.0, + "entropy/max": 0.95703125, + "entropy/mean": 0.421875, + "entropy/min": 0.259765625, + "epoch": 0.553, + "grad_norm": 0.842227210358372, + "kl": 0.2412109375, + "learning_rate": 8.482686749166684e-07, + "loss": 0.0024455091916024685, + "memory(GiB)": 147.2, + "reward": 1.3672181367874146, + "reward_std": 0.14556831121444702, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23915958404541016, + "rewards/EvidenceHallucination/std": 0.4084899425506592, + "rewards/Evidence_Num_Record/mean": 4.6666669845581055, + "rewards/Evidence_Num_Record/std": 1.3733822107315063, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.29081475734710693, + "rewards/VideoAccuracy/std": 0.4380667805671692, + "step": 553, + "train_speed(iter/s)": 0.017541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 613.0, + "completions/mean_length": 448.0714416503906, + "completions/min_length": 311.0, + "entropy/max": 0.447265625, + "entropy/mean": 0.298828125, + "entropy/min": 0.158203125, + "epoch": 0.554, + "grad_norm": 1.087517160201009, + "kl": 0.267578125, + "learning_rate": 8.451328597164677e-07, + "loss": 0.002687928732484579, + "memory(GiB)": 147.2, + "reward": 2.3258044719696045, + "reward_std": 0.27705249190330505, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5598920583724976, + "rewards/EvidenceHallucination/std": 0.3996395468711853, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.9877296090126038, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 1.0519213676452637, + "rewards/VideoAccuracy/std": 0.659984290599823, + "step": 554, + "train_speed(iter/s)": 0.017541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 997.0, + "completions/mean_length": 534.3333129882812, + "completions/min_length": 327.0, + "entropy/max": 1.015625, + "entropy/mean": 0.34375, + "entropy/min": 0.13671875, + "epoch": 0.555, + "grad_norm": 1.1474073643300726, + "kl": 0.2158203125, + "learning_rate": 8.4199860402665e-07, + "loss": 0.0022617948707193136, + "memory(GiB)": 147.2, + "reward": 2.108491897583008, + "reward_std": 0.09929686784744263, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6211246848106384, + "rewards/EvidenceHallucination/std": 0.3694995939731598, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 1.591936707496643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8842668533325195, + "rewards/VideoAccuracy/std": 0.2836246192455292, + "step": 555, + "train_speed(iter/s)": 0.017544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 811.0, + "completions/mean_length": 513.9761962890625, + "completions/min_length": 389.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.341796875, + "entropy/min": 0.2021484375, + "epoch": 0.556, + "grad_norm": 1.0451420644515945, + "kl": 0.23828125, + "learning_rate": 8.38865939409136e-07, + "loss": 0.0024297989439219236, + "memory(GiB)": 147.2, + "reward": 1.6017160415649414, + "reward_std": 0.12775146961212158, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3997710645198822, + "rewards/EvidenceHallucination/std": 0.4567578434944153, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.6100728511810303, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.46938085556030273, + "rewards/VideoAccuracy/std": 0.4314016103744507, + "step": 556, + "train_speed(iter/s)": 0.017521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 688.0, + "completions/mean_length": 478.952392578125, + "completions/min_length": 311.0, + "entropy/max": 0.83203125, + "entropy/mean": 0.3671875, + "entropy/min": 0.220703125, + "epoch": 0.557, + "grad_norm": 1.2076152837928047, + "kl": 0.25390625, + "learning_rate": 8.357348974098231e-07, + "loss": 0.0025751078501343727, + "memory(GiB)": 147.2, + "reward": 1.8883492946624756, + "reward_std": 0.2357943058013916, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45542046427726746, + "rewards/EvidenceHallucination/std": 0.46642157435417175, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.2733304500579834, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4047619104385376, + "rewards/HonestTime/std": 0.49679577350616455, + "rewards/VideoAccuracy/mean": 0.7163127660751343, + "rewards/VideoAccuracy/std": 0.5752731561660767, + "step": 557, + "train_speed(iter/s)": 0.017524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 697.0, + "completions/mean_length": 505.8571472167969, + "completions/min_length": 305.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.283203125, + "entropy/min": 0.1416015625, + "epoch": 0.558, + "grad_norm": 1.2037434616808447, + "kl": 0.2314453125, + "learning_rate": 8.326055095582693e-07, + "loss": 0.0023501659743487835, + "memory(GiB)": 147.2, + "reward": 2.298994302749634, + "reward_std": 0.18438777327537537, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5537318587303162, + "rewards/EvidenceHallucination/std": 0.39360833168029785, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 0.8621610999107361, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 1.0644384622573853, + "rewards/VideoAccuracy/std": 0.3866475224494934, + "step": 558, + "train_speed(iter/s)": 0.017522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1127.0, + "completions/mean_length": 485.0952453613281, + "completions/min_length": 281.0, + "entropy/max": 0.609375, + "entropy/mean": 0.337890625, + "entropy/min": 0.1396484375, + "epoch": 0.559, + "grad_norm": 1.1129566054635687, + "kl": 0.2451171875, + "learning_rate": 8.294778073673761e-07, + "loss": 0.002496413653716445, + "memory(GiB)": 147.2, + "reward": 1.7798714637756348, + "reward_std": 0.1880035102367401, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5607653260231018, + "rewards/EvidenceHallucination/std": 0.3877881169319153, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.6562974452972412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.5724801421165466, + "rewards/VideoAccuracy/std": 0.3908865749835968, + "step": 559, + "train_speed(iter/s)": 0.017521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 702.0, + "completions/mean_length": 447.73809814453125, + "completions/min_length": 306.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.36328125, + "entropy/min": 0.26171875, + "epoch": 0.56, + "grad_norm": 1.239147157526934, + "kl": 0.26953125, + "learning_rate": 8.263518223330696e-07, + "loss": 0.0027397829107940197, + "memory(GiB)": 147.2, + "reward": 1.7501899003982544, + "reward_std": 0.3330119252204895, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46977293491363525, + "rewards/EvidenceHallucination/std": 0.4220462441444397, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.9578818678855896, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6229020357131958, + "rewards/VideoAccuracy/std": 0.5011475086212158, + "step": 560, + "train_speed(iter/s)": 0.017519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 807.0, + "completions/mean_length": 441.4761962890625, + "completions/min_length": 301.0, + "entropy/max": 0.384765625, + "entropy/mean": 0.259765625, + "entropy/min": 0.1240234375, + "epoch": 0.561, + "grad_norm": 1.0567308055382663, + "kl": 0.2578125, + "learning_rate": 8.232275859339841e-07, + "loss": 0.002620976883918047, + "memory(GiB)": 147.2, + "reward": 2.082119941711426, + "reward_std": 0.10417261719703674, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3060951232910156, + "rewards/EvidenceHallucination/std": 0.4310819208621979, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 1.2261664867401123, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8209007978439331, + "rewards/VideoAccuracy/std": 0.5067620873451233, + "step": 561, + "train_speed(iter/s)": 0.017523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 703.0, + "completions/mean_length": 437.23809814453125, + "completions/min_length": 217.0, + "entropy/max": 2.328125, + "entropy/mean": 0.609375, + "entropy/min": 0.23828125, + "epoch": 0.562, + "grad_norm": 1.2524507605021182, + "kl": 0.267578125, + "learning_rate": 8.201051296311461e-07, + "loss": 0.0027312892489135265, + "memory(GiB)": 147.2, + "reward": 1.9048120975494385, + "reward_std": 0.0746854692697525, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6275681257247925, + "rewards/EvidenceHallucination/std": 0.39228275418281555, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.3050869703292847, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108052015304565, + "rewards/VideoAccuracy/mean": 0.7316792011260986, + "rewards/VideoAccuracy/std": 0.38063371181488037, + "step": 562, + "train_speed(iter/s)": 0.017531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.023809523809523808, + "completions/max_length": 2625.0, + "completions/mean_length": 519.1904907226562, + "completions/min_length": 308.0, + "entropy/max": 0.8046875, + "entropy/mean": 0.392578125, + "entropy/min": 0.115234375, + "epoch": 0.563, + "grad_norm": 0.7578925296379078, + "kl": 0.2373046875, + "learning_rate": 8.169844848676552e-07, + "loss": 0.0024781660176813602, + "memory(GiB)": 147.2, + "reward": 1.2615805864334106, + "reward_std": 0.09937258064746857, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.17860430479049683, + "rewards/EvidenceHallucination/std": 0.37374719977378845, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 4.929938793182373, + "rewards/Format/mean": 0.9761905074119568, + "rewards/Format/std": 0.15430334210395813, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.20919305086135864, + "rewards/VideoAccuracy/std": 0.366926908493042, + "step": 563, + "train_speed(iter/s)": 0.017504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/mean_length": 447.4285888671875, + "completions/min_length": 303.0, + "entropy/max": 0.53125, + "entropy/mean": 0.310546875, + "entropy/min": 0.19140625, + "epoch": 0.564, + "grad_norm": 1.0846174037438228, + "kl": 0.27734375, + "learning_rate": 8.138656830683699e-07, + "loss": 0.002800673944875598, + "memory(GiB)": 147.2, + "reward": 1.9805023670196533, + "reward_std": 0.16426292061805725, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4681243896484375, + "rewards/EvidenceHallucination/std": 0.47596821188926697, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 1.2195180654525757, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.7249726057052612, + "rewards/VideoAccuracy/std": 0.5356833338737488, + "step": 564, + "train_speed(iter/s)": 0.017512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 828.0, + "completions/mean_length": 538.2380981445312, + "completions/min_length": 323.0, + "entropy/max": 1.25, + "entropy/mean": 0.41015625, + "entropy/min": 0.1416015625, + "epoch": 0.565, + "grad_norm": 1.1956637226126732, + "kl": 0.2216796875, + "learning_rate": 8.107487556395901e-07, + "loss": 0.0022581107914447784, + "memory(GiB)": 147.2, + "reward": 1.9260531663894653, + "reward_std": 0.3004651665687561, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5718371868133545, + "rewards/EvidenceHallucination/std": 0.40739184617996216, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.0178431272506714, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.711685836315155, + "rewards/VideoAccuracy/std": 0.3880319893360138, + "step": 565, + "train_speed(iter/s)": 0.017513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 616.0, + "completions/mean_length": 433.73809814453125, + "completions/min_length": 310.0, + "entropy/max": 0.6953125, + "entropy/mean": 0.408203125, + "entropy/min": 0.25390625, + "epoch": 0.566, + "grad_norm": 1.172736829803333, + "kl": 0.259765625, + "learning_rate": 8.076337339687394e-07, + "loss": 0.00262894737534225, + "memory(GiB)": 147.2, + "reward": 1.4021013975143433, + "reward_std": 0.22173522412776947, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2598130404949188, + "rewards/EvidenceHallucination/std": 0.41785454750061035, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.2977578043937683, + "rewards/VideoAccuracy/std": 0.3597749173641205, + "step": 566, + "train_speed(iter/s)": 0.017516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 671.0, + "completions/mean_length": 426.1190490722656, + "completions/min_length": 299.0, + "entropy/max": 0.7734375, + "entropy/mean": 0.345703125, + "entropy/min": 0.193359375, + "epoch": 0.567, + "grad_norm": 1.0379878114000836, + "kl": 0.294921875, + "learning_rate": 8.045206494240519e-07, + "loss": 0.0031821592710912228, + "memory(GiB)": 147.2, + "reward": 1.4732636213302612, + "reward_std": 0.13507957756519318, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19460420310497284, + "rewards/EvidenceHallucination/std": 0.37920790910720825, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 1.315722942352295, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.33434268832206726, + "rewards/VideoAccuracy/std": 0.5319502949714661, + "step": 567, + "train_speed(iter/s)": 0.017525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/mean_length": 573.2857055664062, + "completions/min_length": 375.0, + "entropy/max": 1.3828125, + "entropy/mean": 0.306640625, + "entropy/min": 0.11572265625, + "epoch": 0.568, + "grad_norm": 0.8919847704200217, + "kl": 0.21484375, + "learning_rate": 8.014095333542547e-07, + "loss": 0.0021857237443327904, + "memory(GiB)": 147.2, + "reward": 1.85391366481781, + "reward_std": 0.18633460998535156, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3625771403312683, + "rewards/EvidenceHallucination/std": 0.4368312656879425, + "rewards/Evidence_Num_Record/mean": 5.0714287757873535, + "rewards/Evidence_Num_Record/std": 2.2781341075897217, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5952380895614624, + "rewards/HonestTime/std": 0.49679574370384216, + "rewards/VideoAccuracy/mean": 0.6623504161834717, + "rewards/VideoAccuracy/std": 0.6112027168273926, + "step": 568, + "train_speed(iter/s)": 0.017524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1031.0, + "completions/mean_length": 495.3571472167969, + "completions/min_length": 301.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.40234375, + "entropy/min": 0.212890625, + "epoch": 0.569, + "grad_norm": 1.210078635891529, + "kl": 0.23828125, + "learning_rate": 7.983004170882517e-07, + "loss": 0.002446995582431555, + "memory(GiB)": 147.2, + "reward": 1.491422176361084, + "reward_std": 0.3153664767742157, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28596773743629456, + "rewards/EvidenceHallucination/std": 0.39698514342308044, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 2.1299262046813965, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.3485143184661865, + "rewards/VideoAccuracy/std": 0.3958366811275482, + "step": 569, + "train_speed(iter/s)": 0.017519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1365.0, + "completions/mean_length": 507.452392578125, + "completions/min_length": 335.0, + "entropy/max": 0.640625, + "entropy/mean": 0.384765625, + "entropy/min": 0.16015625, + "epoch": 0.57, + "grad_norm": 1.2012402312166044, + "kl": 0.255859375, + "learning_rate": 7.951933319348095e-07, + "loss": 0.0026038773357868195, + "memory(GiB)": 147.2, + "reward": 1.5208826065063477, + "reward_std": 0.28261804580688477, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3382541537284851, + "rewards/EvidenceHallucination/std": 0.4244920015335083, + "rewards/Evidence_Num_Record/mean": 5.047619342803955, + "rewards/Evidence_Num_Record/std": 3.3998019695281982, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.41989845037460327, + "rewards/VideoAccuracy/std": 0.5120235085487366, + "step": 570, + "train_speed(iter/s)": 0.017519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 754.0, + "completions/mean_length": 476.5476379394531, + "completions/min_length": 322.0, + "entropy/max": 0.482421875, + "entropy/mean": 0.26171875, + "entropy/min": 0.09326171875, + "epoch": 0.571, + "grad_norm": 1.083601154480782, + "kl": 0.2421875, + "learning_rate": 7.920883091822408e-07, + "loss": 0.002464497461915016, + "memory(GiB)": 147.2, + "reward": 1.979665756225586, + "reward_std": 0.27502089738845825, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24763883650302887, + "rewards/EvidenceHallucination/std": 0.3845669627189636, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 1.0348178148269653, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9285714626312256, + "rewards/HonestTime/std": 0.26066118478775024, + "rewards/VideoAccuracy/mean": 0.7444235682487488, + "rewards/VideoAccuracy/std": 0.5135056972503662, + "step": 571, + "train_speed(iter/s)": 0.017523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1556.0, + "completions/mean_length": 513.1190795898438, + "completions/min_length": 332.0, + "entropy/max": 1.0390625, + "entropy/mean": 0.40625, + "entropy/min": 0.1865234375, + "epoch": 0.572, + "grad_norm": 1.3195737662730367, + "kl": 0.24609375, + "learning_rate": 7.889853800980903e-07, + "loss": 0.00253634387627244, + "memory(GiB)": 147.2, + "reward": 1.7044157981872559, + "reward_std": 0.27610307931900024, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.436484158039093, + "rewards/EvidenceHallucination/std": 0.42803844809532166, + "rewards/Evidence_Num_Record/mean": 5.523809432983398, + "rewards/Evidence_Num_Record/std": 3.240281105041504, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5552142262458801, + "rewards/VideoAccuracy/std": 0.4598389267921448, + "step": 572, + "train_speed(iter/s)": 0.017519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1473.0, + "completions/mean_length": 519.8809814453125, + "completions/min_length": 308.0, + "entropy/max": 0.59375, + "entropy/mean": 0.369140625, + "entropy/min": 0.150390625, + "epoch": 0.573, + "grad_norm": 1.2196867198222467, + "kl": 0.2421875, + "learning_rate": 7.858845759288197e-07, + "loss": 0.0024828468449413776, + "memory(GiB)": 147.2, + "reward": 1.397599697113037, + "reward_std": 0.20538488030433655, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28373873233795166, + "rewards/EvidenceHallucination/std": 0.40919020771980286, + "rewards/Evidence_Num_Record/mean": 5.38095235824585, + "rewards/Evidence_Num_Record/std": 3.4987967014312744, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.3170424997806549, + "rewards/VideoAccuracy/std": 0.4093017280101776, + "step": 573, + "train_speed(iter/s)": 0.017515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/mean_length": 459.90478515625, + "completions/min_length": 307.0, + "entropy/max": 0.4765625, + "entropy/mean": 0.2890625, + "entropy/min": 0.169921875, + "epoch": 0.574, + "grad_norm": 1.2462310660679652, + "kl": 0.259765625, + "learning_rate": 7.827859278994924e-07, + "loss": 0.002635692711919546, + "memory(GiB)": 147.2, + "reward": 2.3556113243103027, + "reward_std": 0.18688201904296875, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6333656311035156, + "rewards/EvidenceHallucination/std": 0.39550983905792236, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 1.3582885265350342, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 1.067033290863037, + "rewards/VideoAccuracy/std": 0.3786657154560089, + "step": 574, + "train_speed(iter/s)": 0.017512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/mean_length": 519.6666870117188, + "completions/min_length": 345.0, + "entropy/max": 1.546875, + "entropy/mean": 0.3828125, + "entropy/min": 0.1376953125, + "epoch": 0.575, + "grad_norm": 1.1636934037261435, + "kl": 0.2255859375, + "learning_rate": 7.796894672134593e-07, + "loss": 0.0023174858652055264, + "memory(GiB)": 147.2, + "reward": 1.8038508892059326, + "reward_std": 0.2878847122192383, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4917738735675812, + "rewards/EvidenceHallucination/std": 0.4435805678367615, + "rewards/Evidence_Num_Record/mean": 5.095238208770752, + "rewards/Evidence_Num_Record/std": 2.6208314895629883, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.6150199174880981, + "rewards/VideoAccuracy/std": 0.4046759307384491, + "step": 575, + "train_speed(iter/s)": 0.017518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 874.0, + "completions/mean_length": 485.8333435058594, + "completions/min_length": 345.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.37109375, + "entropy/min": 0.1767578125, + "epoch": 0.576, + "grad_norm": 1.1660534427614697, + "kl": 0.22265625, + "learning_rate": 7.765952250520458e-07, + "loss": 0.0022863138001412153, + "memory(GiB)": 147.2, + "reward": 1.7745193243026733, + "reward_std": 0.2418813556432724, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4411916732788086, + "rewards/EvidenceHallucination/std": 0.4348190426826477, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 1.9134889841079712, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.6005666255950928, + "rewards/VideoAccuracy/std": 0.38164791464805603, + "step": 576, + "train_speed(iter/s)": 0.017513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/mean_length": 528.40478515625, + "completions/min_length": 303.0, + "entropy/max": 0.625, + "entropy/mean": 0.337890625, + "entropy/min": 0.13671875, + "epoch": 0.577, + "grad_norm": 1.2322061073052224, + "kl": 0.255859375, + "learning_rate": 7.735032325742355e-07, + "loss": 0.0026552907656878233, + "memory(GiB)": 147.2, + "reward": 1.8820176124572754, + "reward_std": 0.17488104104995728, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5127783417701721, + "rewards/EvidenceHallucination/std": 0.42155125737190247, + "rewards/Evidence_Num_Record/mean": 5.309524059295654, + "rewards/Evidence_Num_Record/std": 3.1583268642425537, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6794617772102356, + "rewards/VideoAccuracy/std": 0.5325960516929626, + "step": 577, + "train_speed(iter/s)": 0.017511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1271.0, + "completions/mean_length": 612.1428833007812, + "completions/min_length": 353.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.287109375, + "entropy/min": 0.1103515625, + "epoch": 0.578, + "grad_norm": 1.0005613736355368, + "kl": 0.2109375, + "learning_rate": 7.704135209163588e-07, + "loss": 0.0021892141085118055, + "memory(GiB)": 147.2, + "reward": 1.8001298904418945, + "reward_std": 0.3935781419277191, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3724662661552429, + "rewards/EvidenceHallucination/std": 0.42378610372543335, + "rewards/Evidence_Num_Record/mean": 5.238095283508301, + "rewards/Evidence_Num_Record/std": 2.535691738128662, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.597065269947052, + "rewards/VideoAccuracy/std": 0.5219387412071228, + "step": 578, + "train_speed(iter/s)": 0.017508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1115.0, + "completions/mean_length": 473.3809509277344, + "completions/min_length": 320.0, + "entropy/max": 0.62109375, + "entropy/mean": 0.369140625, + "entropy/min": 0.1962890625, + "epoch": 0.579, + "grad_norm": 1.2303466431057068, + "kl": 0.2470703125, + "learning_rate": 7.673261211917775e-07, + "loss": 0.0025184685364365578, + "memory(GiB)": 147.2, + "reward": 1.8147884607315063, + "reward_std": 0.2195645570755005, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5988144278526306, + "rewards/EvidenceHallucination/std": 0.42997074127197266, + "rewards/Evidence_Num_Record/mean": 4.833333492279053, + "rewards/Evidence_Num_Record/std": 2.4881834983825684, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.6045495271682739, + "rewards/VideoAccuracy/std": 0.4016510546207428, + "step": 579, + "train_speed(iter/s)": 0.017508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1126.0, + "completions/mean_length": 472.8571472167969, + "completions/min_length": 324.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.375, + "entropy/min": 0.173828125, + "epoch": 0.58, + "grad_norm": 1.3407291707619684, + "kl": 0.255859375, + "learning_rate": 7.642410644905726e-07, + "loss": 0.0025993494782596827, + "memory(GiB)": 147.2, + "reward": 1.6960052251815796, + "reward_std": 0.3615559935569763, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44124239683151245, + "rewards/EvidenceHallucination/std": 0.4154648780822754, + "rewards/Evidence_Num_Record/mean": 4.785714149475098, + "rewards/Evidence_Num_Record/std": 2.3429782390594482, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.54585200548172, + "rewards/VideoAccuracy/std": 0.43818435072898865, + "step": 580, + "train_speed(iter/s)": 0.017505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 765.0, + "completions/mean_length": 453.1190490722656, + "completions/min_length": 324.0, + "entropy/max": 0.48046875, + "entropy/mean": 0.255859375, + "entropy/min": 0.125, + "epoch": 0.581, + "grad_norm": 1.0561630407134095, + "kl": 0.259765625, + "learning_rate": 7.61158381879231e-07, + "loss": 0.0026355660520493984, + "memory(GiB)": 147.2, + "reward": 2.223803758621216, + "reward_std": 0.10234322398900986, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46720772981643677, + "rewards/EvidenceHallucination/std": 0.4397595524787903, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.7054623365402222, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9303621649742126, + "rewards/VideoAccuracy/std": 0.45081934332847595, + "step": 581, + "train_speed(iter/s)": 0.017508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 874.0, + "completions/mean_length": 523.7380981445312, + "completions/min_length": 324.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.38671875, + "entropy/min": 0.23828125, + "epoch": 0.582, + "grad_norm": 1.0339141819625919, + "kl": 0.251953125, + "learning_rate": 7.580781044003324e-07, + "loss": 0.0025591151788830757, + "memory(GiB)": 147.2, + "reward": 1.577696681022644, + "reward_std": 0.1125200018286705, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32422274351119995, + "rewards/EvidenceHallucination/std": 0.41044461727142334, + "rewards/Evidence_Num_Record/mean": 5.357142925262451, + "rewards/Evidence_Num_Record/std": 2.034116744995117, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.446185439825058, + "rewards/VideoAccuracy/std": 0.45348039269447327, + "step": 582, + "train_speed(iter/s)": 0.01751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1058.0, + "completions/mean_length": 488.5714416503906, + "completions/min_length": 312.0, + "entropy/max": 0.91796875, + "entropy/mean": 0.435546875, + "entropy/min": 0.2890625, + "epoch": 0.583, + "grad_norm": 1.282970983136212, + "kl": 0.263671875, + "learning_rate": 7.550002630722365e-07, + "loss": 0.002655723597854376, + "memory(GiB)": 147.2, + "reward": 1.706022024154663, + "reward_std": 0.245122492313385, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3257083594799042, + "rewards/EvidenceHallucination/std": 0.4306149482727051, + "rewards/Evidence_Num_Record/mean": 4.928571701049805, + "rewards/Evidence_Num_Record/std": 2.0047850608825684, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5742137432098389, + "rewards/VideoAccuracy/std": 0.38398995995521545, + "step": 583, + "train_speed(iter/s)": 0.017507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 717.0, + "completions/mean_length": 452.21429443359375, + "completions/min_length": 300.0, + "entropy/max": 0.53125, + "entropy/mean": 0.333984375, + "entropy/min": 0.16796875, + "epoch": 0.584, + "grad_norm": 1.1753810847670432, + "kl": 0.26953125, + "learning_rate": 7.519248888887715e-07, + "loss": 0.0027270063292235136, + "memory(GiB)": 147.2, + "reward": 2.1703648567199707, + "reward_std": 0.13654127717018127, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44237229228019714, + "rewards/EvidenceHallucination/std": 0.4528614282608032, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.2230842113494873, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9152234792709351, + "rewards/VideoAccuracy/std": 0.557100772857666, + "step": 584, + "train_speed(iter/s)": 0.017513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1760.0, + "completions/mean_length": 486.8809509277344, + "completions/min_length": 280.0, + "entropy/max": 1.4765625, + "entropy/mean": 0.388671875, + "entropy/min": 0.1376953125, + "epoch": 0.585, + "grad_norm": 1.1676599210226963, + "kl": 0.2451171875, + "learning_rate": 7.488520128189208e-07, + "loss": 0.00256509892642498, + "memory(GiB)": 147.2, + "reward": 1.6237132549285889, + "reward_std": 0.21546559035778046, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14745573699474335, + "rewards/EvidenceHallucination/std": 0.31591618061065674, + "rewards/Evidence_Num_Record/mean": 4.952381134033203, + "rewards/Evidence_Num_Record/std": 3.6018450260162354, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.49898406863212585, + "rewards/VideoAccuracy/std": 0.48285984992980957, + "step": 585, + "train_speed(iter/s)": 0.01751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 577.0, + "completions/mean_length": 446.69049072265625, + "completions/min_length": 331.0, + "entropy/max": 1.0, + "entropy/mean": 0.400390625, + "entropy/min": 0.25390625, + "epoch": 0.586, + "grad_norm": 1.1332462195699249, + "kl": 0.265625, + "learning_rate": 7.457816658065132e-07, + "loss": 0.002694082912057638, + "memory(GiB)": 147.2, + "reward": 1.4800655841827393, + "reward_std": 0.29056552052497864, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.256216824054718, + "rewards/EvidenceHallucination/std": 0.3800258934497833, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 0.8499504923820496, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.32882219552993774, + "rewards/VideoAccuracy/std": 0.38793131709098816, + "step": 586, + "train_speed(iter/s)": 0.017512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 646.0, + "completions/mean_length": 456.16668701171875, + "completions/min_length": 346.0, + "entropy/max": 0.7265625, + "entropy/mean": 0.3828125, + "entropy/min": 0.2392578125, + "epoch": 0.587, + "grad_norm": 1.1743006768939335, + "kl": 0.283203125, + "learning_rate": 7.427138787699085e-07, + "loss": 0.0028411007951945066, + "memory(GiB)": 147.2, + "reward": 1.962119221687317, + "reward_std": 0.27418652176856995, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5741019248962402, + "rewards/EvidenceHallucination/std": 0.4399320185184479, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 0.7908447980880737, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.7520606517791748, + "rewards/VideoAccuracy/std": 0.5276792645454407, + "step": 587, + "train_speed(iter/s)": 0.017511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 949.0, + "completions/mean_length": 500.3571472167969, + "completions/min_length": 327.0, + "entropy/max": 1.4765625, + "entropy/mean": 0.345703125, + "entropy/min": 0.15625, + "epoch": 0.588, + "grad_norm": 0.9097728252200424, + "kl": 0.2294921875, + "learning_rate": 7.396486826016879e-07, + "loss": 0.002346982713788748, + "memory(GiB)": 147.2, + "reward": 1.7889320850372314, + "reward_std": 0.1560111939907074, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14849649369716644, + "rewards/EvidenceHallucination/std": 0.2909368872642517, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 1.9500142335891724, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.6306614279747009, + "rewards/VideoAccuracy/std": 0.48010721802711487, + "step": 588, + "train_speed(iter/s)": 0.017511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 920.0, + "completions/mean_length": 504.8571472167969, + "completions/min_length": 275.0, + "entropy/max": 0.6796875, + "entropy/mean": 0.396484375, + "entropy/min": 0.2275390625, + "epoch": 0.589, + "grad_norm": 1.3897835790727282, + "kl": 0.2578125, + "learning_rate": 7.365861081683433e-07, + "loss": 0.0026217829436063766, + "memory(GiB)": 147.2, + "reward": 1.7859326601028442, + "reward_std": 0.30997252464294434, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5363885164260864, + "rewards/EvidenceHallucination/std": 0.4576357901096344, + "rewards/Evidence_Num_Record/mean": 5.190476417541504, + "rewards/Evidence_Num_Record/std": 1.811051368713379, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.588178813457489, + "rewards/VideoAccuracy/std": 0.37847551703453064, + "step": 589, + "train_speed(iter/s)": 0.017506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 825.0, + "completions/mean_length": 453.4761962890625, + "completions/min_length": 264.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.380859375, + "entropy/min": 0.2177734375, + "epoch": 0.59, + "grad_norm": 1.3803185320621216, + "kl": 0.267578125, + "learning_rate": 7.335261863099651e-07, + "loss": 0.002693876624107361, + "memory(GiB)": 147.2, + "reward": 1.5962520837783813, + "reward_std": 0.34639328718185425, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3232976496219635, + "rewards/EvidenceHallucination/std": 0.44262251257896423, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.5833408832550049, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.46968793869018555, + "rewards/VideoAccuracy/std": 0.4826664924621582, + "step": 590, + "train_speed(iter/s)": 0.017508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 805.0, + "completions/mean_length": 466.1190490722656, + "completions/min_length": 305.0, + "entropy/max": 0.458984375, + "entropy/mean": 0.294921875, + "entropy/min": 0.140625, + "epoch": 0.591, + "grad_norm": 1.1045732780872357, + "kl": 0.2431640625, + "learning_rate": 7.304689478399322e-07, + "loss": 0.002461612457409501, + "memory(GiB)": 147.2, + "reward": 2.038516044616699, + "reward_std": 0.10969673097133636, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5281686782836914, + "rewards/EvidenceHallucination/std": 0.4227769374847412, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 0.5436787009239197, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7328824400901794, + "rewards/VideoAccuracy/std": 0.47139081358909607, + "step": 591, + "train_speed(iter/s)": 0.017516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1060.0, + "completions/mean_length": 469.452392578125, + "completions/min_length": 244.0, + "entropy/max": 1.046875, + "entropy/mean": 0.44140625, + "entropy/min": 0.2353515625, + "epoch": 0.592, + "grad_norm": 1.180903660171026, + "kl": 0.275390625, + "learning_rate": 7.274144235446023e-07, + "loss": 0.0028012755792587996, + "memory(GiB)": 147.2, + "reward": 1.4012137651443481, + "reward_std": 0.2524511516094208, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.25063470005989075, + "rewards/EvidenceHallucination/std": 0.36757490038871765, + "rewards/Evidence_Num_Record/mean": 5.452381134033203, + "rewards/Evidence_Num_Record/std": 2.777982711791992, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.32251545786857605, + "rewards/VideoAccuracy/std": 0.44714128971099854, + "step": 592, + "train_speed(iter/s)": 0.017481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 688.0, + "completions/mean_length": 447.7857360839844, + "completions/min_length": 307.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.408203125, + "entropy/min": 0.28125, + "epoch": 0.593, + "grad_norm": 1.3056742391318676, + "kl": 0.279296875, + "learning_rate": 7.243626441830009e-07, + "loss": 0.002849259879440069, + "memory(GiB)": 147.2, + "reward": 1.611133098602295, + "reward_std": 0.12857817113399506, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.374485045671463, + "rewards/EvidenceHallucination/std": 0.42864927649497986, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.065780758857727, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.48385506868362427, + "rewards/VideoAccuracy/std": 0.4231003522872925, + "step": 593, + "train_speed(iter/s)": 0.017483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 682.0, + "completions/mean_length": 463.4761962890625, + "completions/min_length": 279.0, + "entropy/max": 0.65625, + "entropy/mean": 0.310546875, + "entropy/min": 0.130859375, + "epoch": 0.594, + "grad_norm": 1.1866145801590529, + "kl": 0.265625, + "learning_rate": 7.213136404865123e-07, + "loss": 0.0026950924657285213, + "memory(GiB)": 147.2, + "reward": 2.351012945175171, + "reward_std": 0.2195076197385788, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6800523400306702, + "rewards/EvidenceHallucination/std": 0.3688565790653229, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.051518440246582, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 1.0483359098434448, + "rewards/VideoAccuracy/std": 0.4914064407348633, + "step": 594, + "train_speed(iter/s)": 0.017488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 798.0, + "completions/mean_length": 502.6428527832031, + "completions/min_length": 330.0, + "entropy/max": 0.8984375, + "entropy/mean": 0.41015625, + "entropy/min": 0.1435546875, + "epoch": 0.595, + "grad_norm": 1.0244119066397843, + "kl": 0.2451171875, + "learning_rate": 7.182674431585702e-07, + "loss": 0.0024832345079630613, + "memory(GiB)": 147.2, + "reward": 1.6552233695983887, + "reward_std": 0.21051955223083496, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44998759031295776, + "rewards/EvidenceHallucination/std": 0.46245652437210083, + "rewards/Evidence_Num_Record/mean": 4.785714149475098, + "rewards/Evidence_Num_Record/std": 1.0249501466751099, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.4985591471195221, + "rewards/VideoAccuracy/std": 0.4166490137577057, + "step": 595, + "train_speed(iter/s)": 0.017487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 635.0, + "completions/mean_length": 437.7857360839844, + "completions/min_length": 281.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.416015625, + "entropy/min": 0.259765625, + "epoch": 0.596, + "grad_norm": 1.2185757668145605, + "kl": 0.287109375, + "learning_rate": 7.152240828743477e-07, + "loss": 0.0029028202407062054, + "memory(GiB)": 147.2, + "reward": 1.6317411661148071, + "reward_std": 0.16950979828834534, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39286577701568604, + "rewards/EvidenceHallucination/std": 0.4430376887321472, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.2916690111160278, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.4626917243003845, + "rewards/VideoAccuracy/std": 0.37667131423950195, + "step": 596, + "train_speed(iter/s)": 0.017497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 611.0, + "completions/mean_length": 428.71429443359375, + "completions/min_length": 320.0, + "entropy/max": 0.671875, + "entropy/mean": 0.412109375, + "entropy/min": 0.2431640625, + "epoch": 0.597, + "grad_norm": 1.38619725092822, + "kl": 0.28515625, + "learning_rate": 7.121835902804489e-07, + "loss": 0.002899776678532362, + "memory(GiB)": 147.2, + "reward": 1.8062628507614136, + "reward_std": 0.14738309383392334, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5170978307723999, + "rewards/EvidenceHallucination/std": 0.47136178612709045, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.0339757204055786, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6028432250022888, + "rewards/VideoAccuracy/std": 0.5212418437004089, + "step": 597, + "train_speed(iter/s)": 0.017495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 739.0, + "completions/mean_length": 442.76190185546875, + "completions/min_length": 287.0, + "entropy/max": 0.77734375, + "entropy/mean": 0.33203125, + "entropy/min": 0.11279296875, + "epoch": 0.598, + "grad_norm": 1.255813910891952, + "kl": 0.255859375, + "learning_rate": 7.091459959946009e-07, + "loss": 0.0025937955360859632, + "memory(GiB)": 147.2, + "reward": 1.9070651531219482, + "reward_std": 0.27369827032089233, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49443018436431885, + "rewards/EvidenceHallucination/std": 0.42579492926597595, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.859932780265808, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074110031128, + "rewards/VideoAccuracy/mean": 0.6843696236610413, + "rewards/VideoAccuracy/std": 0.4975552558898926, + "step": 598, + "train_speed(iter/s)": 0.017496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/mean_length": 460.16668701171875, + "completions/min_length": 266.0, + "entropy/max": 2.078125, + "entropy/mean": 0.5390625, + "entropy/min": 0.1728515625, + "epoch": 0.599, + "grad_norm": 1.222683216704578, + "kl": 0.283203125, + "learning_rate": 7.061113306053442e-07, + "loss": 0.0029310905374586582, + "memory(GiB)": 147.2, + "reward": 1.7134166955947876, + "reward_std": 0.2816122770309448, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4839991629123688, + "rewards/EvidenceHallucination/std": 0.459764689207077, + "rewards/Evidence_Num_Record/mean": 4.976190567016602, + "rewards/Evidence_Num_Record/std": 2.4343886375427246, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5499501824378967, + "rewards/VideoAccuracy/std": 0.44302186369895935, + "step": 599, + "train_speed(iter/s)": 0.017486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 579.0, + "completions/mean_length": 437.5476379394531, + "completions/min_length": 295.0, + "entropy/max": 0.5625, + "entropy/mean": 0.4140625, + "entropy/min": 0.2197265625, + "epoch": 0.6, + "grad_norm": 1.1777310155517764, + "kl": 0.263671875, + "learning_rate": 7.030796246717255e-07, + "loss": 0.002670376095920801, + "memory(GiB)": 147.2, + "reward": 1.5067311525344849, + "reward_std": 0.24075977504253387, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2628157436847687, + "rewards/EvidenceHallucination/std": 0.3982740640640259, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.0314452648162842, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.3922632336616516, + "rewards/VideoAccuracy/std": 0.4786408841609955, + "step": 600, + "train_speed(iter/s)": 0.017489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 733.0, + "completions/mean_length": 490.3333435058594, + "completions/min_length": 305.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.259765625, + "entropy/min": 0.11865234375, + "epoch": 0.601, + "grad_norm": 1.0637205709825024, + "kl": 0.23828125, + "learning_rate": 7.000509087229894e-07, + "loss": 0.002422991441562772, + "memory(GiB)": 147.2, + "reward": 2.0810062885284424, + "reward_std": 0.13841350376605988, + "rewards/EvidenceFormat/mean": 0.9523809552192688, + "rewards/EvidenceFormat/std": 0.21554027497768402, + "rewards/EvidenceHallucination/mean": 0.33854150772094727, + "rewards/EvidenceHallucination/std": 0.4618864953517914, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.134661316871643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8809524178504944, + "rewards/HonestTime/std": 0.32777005434036255, + "rewards/VideoAccuracy/mean": 0.8609171509742737, + "rewards/VideoAccuracy/std": 0.4651089906692505, + "step": 601, + "train_speed(iter/s)": 0.01747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 748.0, + "completions/mean_length": 488.9761962890625, + "completions/min_length": 307.0, + "entropy/max": 1.015625, + "entropy/mean": 0.53125, + "entropy/min": 0.30859375, + "epoch": 0.602, + "grad_norm": 0.9703941666790854, + "kl": 0.28515625, + "learning_rate": 6.970252132582728e-07, + "loss": 0.0028867856599390507, + "memory(GiB)": 147.2, + "reward": 1.5431602001190186, + "reward_std": 0.09946687519550323, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37068021297454834, + "rewards/EvidenceHallucination/std": 0.40233659744262695, + "rewards/Evidence_Num_Record/mean": 5.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.5522265434265137, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.4404527544975281, + "rewards/VideoAccuracy/std": 0.4748918116092682, + "step": 602, + "train_speed(iter/s)": 0.017466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 673.0, + "completions/mean_length": 407.8333435058594, + "completions/min_length": 288.0, + "entropy/max": 0.54296875, + "entropy/mean": 0.39453125, + "entropy/min": 0.25390625, + "epoch": 0.603, + "grad_norm": 1.2840891486656645, + "kl": 0.291015625, + "learning_rate": 6.940025687462952e-07, + "loss": 0.0029300376772880554, + "memory(GiB)": 147.2, + "reward": 1.7167476415634155, + "reward_std": 0.24148140847682953, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.536012589931488, + "rewards/EvidenceHallucination/std": 0.4745959937572479, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.7213357090950012, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.5476404428482056, + "rewards/VideoAccuracy/std": 0.4502407908439636, + "step": 603, + "train_speed(iter/s)": 0.017468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 597.0, + "completions/mean_length": 410.4047546386719, + "completions/min_length": 282.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.341796875, + "entropy/min": 0.1318359375, + "epoch": 0.604, + "grad_norm": 1.3496509722491907, + "kl": 0.3046875, + "learning_rate": 6.909830056250526e-07, + "loss": 0.0030762115493416786, + "memory(GiB)": 147.2, + "reward": 2.4725546836853027, + "reward_std": 0.2338365763425827, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7553823590278625, + "rewards/EvidenceHallucination/std": 0.3370148837566376, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.8406761288642883, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 1.1548113822937012, + "rewards/VideoAccuracy/std": 0.5124393701553345, + "step": 604, + "train_speed(iter/s)": 0.017471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1046.0, + "completions/mean_length": 491.21429443359375, + "completions/min_length": 301.0, + "entropy/max": 1.4375, + "entropy/mean": 0.34375, + "entropy/min": 0.1318359375, + "epoch": 0.605, + "grad_norm": 1.0787786489605784, + "kl": 0.2333984375, + "learning_rate": 6.87966554301513e-07, + "loss": 0.0023751643020659685, + "memory(GiB)": 147.2, + "reward": 1.8977985382080078, + "reward_std": 0.2284865379333496, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4612298905849457, + "rewards/EvidenceHallucination/std": 0.41398024559020996, + "rewards/Evidence_Num_Record/mean": 4.6666669845581055, + "rewards/Evidence_Num_Record/std": 1.6028430461883545, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7388858199119568, + "rewards/VideoAccuracy/std": 0.4245954751968384, + "step": 605, + "train_speed(iter/s)": 0.01747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 667.0, + "completions/mean_length": 459.9285888671875, + "completions/min_length": 323.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.439453125, + "entropy/min": 0.310546875, + "epoch": 0.606, + "grad_norm": 1.2847746652791245, + "kl": 0.271484375, + "learning_rate": 6.849532451513073e-07, + "loss": 0.0027352008037269115, + "memory(GiB)": 147.2, + "reward": 1.4930458068847656, + "reward_std": 0.1793939769268036, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21473932266235352, + "rewards/EvidenceHallucination/std": 0.36765056848526, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 0.8873874545097351, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.3500978350639343, + "rewards/VideoAccuracy/std": 0.41116583347320557, + "step": 606, + "train_speed(iter/s)": 0.017463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 566.0, + "completions/mean_length": 421.3095397949219, + "completions/min_length": 280.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.412109375, + "entropy/min": 0.251953125, + "epoch": 0.607, + "grad_norm": 1.2772980962615592, + "kl": 0.2890625, + "learning_rate": 6.819431085184251e-07, + "loss": 0.0029103453271090984, + "memory(GiB)": 147.2, + "reward": 1.8208121061325073, + "reward_std": 0.14132027328014374, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48530933260917664, + "rewards/EvidenceHallucination/std": 0.4333818256855011, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.8025076389312744, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.6285120844841003, + "rewards/VideoAccuracy/std": 0.532863438129425, + "step": 607, + "train_speed(iter/s)": 0.017465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1294.0, + "completions/mean_length": 563.0952758789062, + "completions/min_length": 319.0, + "entropy/max": 1.9296875, + "entropy/mean": 0.330078125, + "entropy/min": 0.126953125, + "epoch": 0.608, + "grad_norm": 0.982542051536595, + "kl": 0.2275390625, + "learning_rate": 6.789361747149092e-07, + "loss": 0.0023674024268984795, + "memory(GiB)": 147.2, + "reward": 1.9754973649978638, + "reward_std": 0.06728312373161316, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37929102778434753, + "rewards/EvidenceHallucination/std": 0.4013298451900482, + "rewards/Evidence_Num_Record/mean": 5.11904764175415, + "rewards/Evidence_Num_Record/std": 2.8982954025268555, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.7758294939994812, + "rewards/VideoAccuracy/std": 0.46641021966934204, + "step": 608, + "train_speed(iter/s)": 0.017465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 780.0, + "completions/mean_length": 469.0952453613281, + "completions/min_length": 280.0, + "entropy/max": 0.953125, + "entropy/mean": 0.4140625, + "entropy/min": 0.2001953125, + "epoch": 0.609, + "grad_norm": 1.3475060514297332, + "kl": 0.271484375, + "learning_rate": 6.759324740205495e-07, + "loss": 0.0027493652887642384, + "memory(GiB)": 147.2, + "reward": 1.913028597831726, + "reward_std": 0.2779674530029297, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6431964635848999, + "rewards/EvidenceHallucination/std": 0.40461066365242004, + "rewards/Evidence_Num_Record/mean": 4.928571701049805, + "rewards/Evidence_Num_Record/std": 1.9049724340438843, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.717722475528717, + "rewards/VideoAccuracy/std": 0.37390825152397156, + "step": 609, + "train_speed(iter/s)": 0.017459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1650.0, + "completions/mean_length": 467.1190490722656, + "completions/min_length": 344.0, + "entropy/max": 0.6796875, + "entropy/mean": 0.419921875, + "entropy/min": 0.2216796875, + "epoch": 0.61, + "grad_norm": 1.2935326073326132, + "kl": 0.283203125, + "learning_rate": 6.729320366825783e-07, + "loss": 0.002889070427045226, + "memory(GiB)": 147.2, + "reward": 1.356255292892456, + "reward_std": 0.21071168780326843, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13856059312820435, + "rewards/EvidenceHallucination/std": 0.31749221682548523, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 3.5196478366851807, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.2618764638900757, + "rewards/VideoAccuracy/std": 0.27847543358802795, + "step": 610, + "train_speed(iter/s)": 0.017455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 825.0, + "completions/mean_length": 515.6190795898438, + "completions/min_length": 291.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.326171875, + "entropy/min": 0.130859375, + "epoch": 0.611, + "grad_norm": 1.2379947419241533, + "kl": 0.2333984375, + "learning_rate": 6.699348929153668e-07, + "loss": 0.002381596015766263, + "memory(GiB)": 147.2, + "reward": 1.9828771352767944, + "reward_std": 0.19084368646144867, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.43019285798072815, + "rewards/EvidenceHallucination/std": 0.4468642771244049, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.292343258857727, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9285714626312256, + "rewards/HonestTime/std": 0.26066118478775024, + "rewards/VideoAccuracy/mean": 0.7111242413520813, + "rewards/VideoAccuracy/std": 0.34175026416778564, + "step": 611, + "train_speed(iter/s)": 0.017455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1598.0, + "completions/mean_length": 503.5952453613281, + "completions/min_length": 346.0, + "entropy/max": 1.5546875, + "entropy/mean": 0.484375, + "entropy/min": 0.1650390625, + "epoch": 0.612, + "grad_norm": 1.2842849121027213, + "kl": 0.283203125, + "learning_rate": 6.669410729001193e-07, + "loss": 0.002932178322225809, + "memory(GiB)": 147.2, + "reward": 1.6253175735473633, + "reward_std": 0.32313433289527893, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46873438358306885, + "rewards/EvidenceHallucination/std": 0.4804971516132355, + "rewards/Evidence_Num_Record/mean": 5.333333492279053, + "rewards/Evidence_Num_Record/std": 3.159705400466919, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.4982372522354126, + "rewards/VideoAccuracy/std": 0.4757577180862427, + "step": 612, + "train_speed(iter/s)": 0.01745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 657.0, + "completions/mean_length": 454.952392578125, + "completions/min_length": 294.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.44140625, + "entropy/min": 0.255859375, + "epoch": 0.613, + "grad_norm": 1.3884812004983706, + "kl": 0.283203125, + "learning_rate": 6.639506067845698e-07, + "loss": 0.002851371420547366, + "memory(GiB)": 147.2, + "reward": 1.5651453733444214, + "reward_std": 0.34722527861595154, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40665119886398315, + "rewards/EvidenceHallucination/std": 0.46054762601852417, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.383493185043335, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.4314342439174652, + "rewards/VideoAccuracy/std": 0.4196312427520752, + "step": 613, + "train_speed(iter/s)": 0.017448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 598.0, + "completions/mean_length": 421.4285888671875, + "completions/min_length": 335.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.369140625, + "entropy/min": 0.119140625, + "epoch": 0.614, + "grad_norm": 1.3214791874769356, + "kl": 0.294921875, + "learning_rate": 6.609635246826793e-07, + "loss": 0.002968190936371684, + "memory(GiB)": 147.2, + "reward": 2.231680154800415, + "reward_std": 0.18747715651988983, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6672221422195435, + "rewards/EvidenceHallucination/std": 0.39013221859931946, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.8968262076377869, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9315690994262695, + "rewards/VideoAccuracy/std": 0.45265018939971924, + "step": 614, + "train_speed(iter/s)": 0.01745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/mean_length": 510.71429443359375, + "completions/min_length": 353.0, + "entropy/max": 1.7109375, + "entropy/mean": 0.46875, + "entropy/min": 0.1318359375, + "epoch": 0.615, + "grad_norm": 1.275982360305517, + "kl": 0.251953125, + "learning_rate": 6.579798566743313e-07, + "loss": 0.0025584520772099495, + "memory(GiB)": 147.2, + "reward": 1.7848589420318604, + "reward_std": 0.32896852493286133, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33377861976623535, + "rewards/EvidenceHallucination/std": 0.38658350706100464, + "rewards/Evidence_Num_Record/mean": 5.190476417541504, + "rewards/Evidence_Num_Record/std": 1.4523000717163086, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.6561984419822693, + "rewards/VideoAccuracy/std": 0.45942410826683044, + "step": 615, + "train_speed(iter/s)": 0.017444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 628.0, + "completions/mean_length": 447.5714416503906, + "completions/min_length": 317.0, + "entropy/max": 0.7265625, + "entropy/mean": 0.4375, + "entropy/min": 0.283203125, + "epoch": 0.616, + "grad_norm": 1.1098779874054865, + "kl": 0.279296875, + "learning_rate": 6.549996328050296e-07, + "loss": 0.0028115222230553627, + "memory(GiB)": 147.2, + "reward": 1.5975712537765503, + "reward_std": 0.13794991374015808, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37659451365470886, + "rewards/EvidenceHallucination/std": 0.4494171142578125, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.137727975845337, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.42225220799446106, + "rewards/VideoAccuracy/std": 0.38261863589286804, + "step": 616, + "train_speed(iter/s)": 0.017446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 701.0, + "completions/mean_length": 444.21429443359375, + "completions/min_length": 310.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.4140625, + "entropy/min": 0.248046875, + "epoch": 0.617, + "grad_norm": 1.3243024689622669, + "kl": 0.298828125, + "learning_rate": 6.52022883085595e-07, + "loss": 0.0030175037682056427, + "memory(GiB)": 147.2, + "reward": 1.6268917322158813, + "reward_std": 0.39017921686172485, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.266214519739151, + "rewards/EvidenceHallucination/std": 0.4012203812599182, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 1.0820035934448242, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.47841063141822815, + "rewards/VideoAccuracy/std": 0.5621667504310608, + "step": 617, + "train_speed(iter/s)": 0.017444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1220.0, + "completions/mean_length": 506.6428527832031, + "completions/min_length": 321.0, + "entropy/max": 2.234375, + "entropy/mean": 0.384765625, + "entropy/min": 0.12890625, + "epoch": 0.618, + "grad_norm": 1.0431495374899582, + "kl": 0.2373046875, + "learning_rate": 6.490496374918646e-07, + "loss": 0.002435671165585518, + "memory(GiB)": 147.2, + "reward": 1.9094020128250122, + "reward_std": 0.16423994302749634, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3320138156414032, + "rewards/EvidenceHallucination/std": 0.43036186695098877, + "rewards/Evidence_Num_Record/mean": 4.904761791229248, + "rewards/Evidence_Num_Record/std": 2.8353991508483887, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7096660137176514, + "rewards/VideoAccuracy/std": 0.5061018466949463, + "step": 618, + "train_speed(iter/s)": 0.017446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 833.0, + "completions/mean_length": 495.8571472167969, + "completions/min_length": 318.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.40625, + "entropy/min": 0.2421875, + "epoch": 0.619, + "grad_norm": 0.8913717456101545, + "kl": 0.27734375, + "learning_rate": 6.460799259643883e-07, + "loss": 0.0028232461772859097, + "memory(GiB)": 147.2, + "reward": 1.402701497077942, + "reward_std": 0.1259581446647644, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24587325751781464, + "rewards/EvidenceHallucination/std": 0.39733564853668213, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 1.4979660511016846, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.29162219166755676, + "rewards/VideoAccuracy/std": 0.3946917951107025, + "step": 619, + "train_speed(iter/s)": 0.017446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 644.0, + "completions/mean_length": 408.73809814453125, + "completions/min_length": 268.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.419921875, + "entropy/min": 0.25390625, + "epoch": 0.62, + "grad_norm": 1.4400826272655762, + "kl": 0.294921875, + "learning_rate": 6.431137784081281e-07, + "loss": 0.002960315439850092, + "memory(GiB)": 147.2, + "reward": 1.6624799966812134, + "reward_std": 0.2637644112110138, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30846282839775085, + "rewards/EvidenceHallucination/std": 0.3980914354324341, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.0215448141098022, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5388826727867126, + "rewards/VideoAccuracy/std": 0.5325735211372375, + "step": 620, + "train_speed(iter/s)": 0.01745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 834.0, + "completions/mean_length": 520.90478515625, + "completions/min_length": 335.0, + "entropy/max": 0.54296875, + "entropy/mean": 0.30859375, + "entropy/min": 0.14453125, + "epoch": 0.621, + "grad_norm": 1.0983362750381223, + "kl": 0.2451171875, + "learning_rate": 6.401512246921575e-07, + "loss": 0.0024788822047412395, + "memory(GiB)": 147.2, + "reward": 1.777593731880188, + "reward_std": 0.09118415415287018, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14555399119853973, + "rewards/EvidenceHallucination/std": 0.3323424160480499, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 0.8006965517997742, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9047619104385376, + "rewards/HonestTime/std": 0.297101765871048, + "rewards/VideoAccuracy/mean": 0.5675305128097534, + "rewards/VideoAccuracy/std": 0.3709167242050171, + "step": 621, + "train_speed(iter/s)": 0.017452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1656.0, + "completions/mean_length": 524.9285888671875, + "completions/min_length": 269.0, + "entropy/max": 1.28125, + "entropy/mean": 0.47265625, + "entropy/min": 0.1416015625, + "epoch": 0.622, + "grad_norm": 1.3260165857487658, + "kl": 0.26953125, + "learning_rate": 6.371922946493591e-07, + "loss": 0.0027844994328916073, + "memory(GiB)": 147.2, + "reward": 1.8846853971481323, + "reward_std": 0.11145021766424179, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5921638011932373, + "rewards/EvidenceHallucination/std": 0.3846372067928314, + "rewards/Evidence_Num_Record/mean": 6.142857074737549, + "rewards/Evidence_Num_Record/std": 4.170578956604004, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7329192757606506, + "rewards/VideoAccuracy/std": 0.39959076046943665, + "step": 622, + "train_speed(iter/s)": 0.017447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2057.0, + "completions/mean_length": 524.1666870117188, + "completions/min_length": 345.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.4140625, + "entropy/min": 0.1123046875, + "epoch": 0.623, + "grad_norm": 1.1080644567091926, + "kl": 0.271484375, + "learning_rate": 6.342370180761255e-07, + "loss": 0.0028053666464984417, + "memory(GiB)": 147.2, + "reward": 1.4755423069000244, + "reward_std": 0.21746203303337097, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3213748037815094, + "rewards/EvidenceHallucination/std": 0.44025641679763794, + "rewards/Evidence_Num_Record/mean": 5.214285850524902, + "rewards/Evidence_Num_Record/std": 4.093634128570557, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.34936246275901794, + "rewards/VideoAccuracy/std": 0.35842636227607727, + "step": 623, + "train_speed(iter/s)": 0.017442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 667.0, + "completions/mean_length": 449.2857360839844, + "completions/min_length": 349.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.388671875, + "entropy/min": 0.166015625, + "epoch": 0.624, + "grad_norm": 1.249943430402762, + "kl": 0.279296875, + "learning_rate": 6.312854247320594e-07, + "loss": 0.002804760355502367, + "memory(GiB)": 147.2, + "reward": 1.978733777999878, + "reward_std": 0.21570143103599548, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3235350251197815, + "rewards/EvidenceHallucination/std": 0.423623651266098, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.0744633674621582, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7473601698875427, + "rewards/VideoAccuracy/std": 0.5739562511444092, + "step": 624, + "train_speed(iter/s)": 0.017444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/mean_length": 480.69049072265625, + "completions/min_length": 324.0, + "entropy/max": 1.59375, + "entropy/mean": 0.515625, + "entropy/min": 0.12109375, + "epoch": 0.625, + "grad_norm": 0.7677987436654703, + "kl": 0.255859375, + "learning_rate": 6.283375443396726e-07, + "loss": 0.0026296665892004967, + "memory(GiB)": 147.2, + "reward": 1.5685820579528809, + "reward_std": 0.06617039442062378, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1186329573392868, + "rewards/EvidenceHallucination/std": 0.27648842334747314, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 2.921743631362915, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.48295068740844727, + "rewards/VideoAccuracy/std": 0.5402042865753174, + "step": 625, + "train_speed(iter/s)": 0.017442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 677.0, + "completions/mean_length": 476.1428527832031, + "completions/min_length": 313.0, + "entropy/max": 0.94921875, + "entropy/mean": 0.49609375, + "entropy/min": 0.3515625, + "epoch": 0.626, + "grad_norm": 1.4090372368111226, + "kl": 0.265625, + "learning_rate": 6.253934065840879e-07, + "loss": 0.0027147873770445585, + "memory(GiB)": 147.2, + "reward": 1.6841174364089966, + "reward_std": 0.2543501853942871, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3653638958930969, + "rewards/EvidenceHallucination/std": 0.4579576551914215, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 1.0135550498962402, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5476190447807312, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.5015208721160889, + "rewards/VideoAccuracy/std": 0.39004263281822205, + "step": 626, + "train_speed(iter/s)": 0.017447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 895.0, + "completions/mean_length": 462.8095397949219, + "completions/min_length": 345.0, + "entropy/max": 0.71875, + "entropy/mean": 0.421875, + "entropy/min": 0.2578125, + "epoch": 0.627, + "grad_norm": 1.2906780037915064, + "kl": 0.275390625, + "learning_rate": 6.224530411127402e-07, + "loss": 0.0027889562770724297, + "memory(GiB)": 147.2, + "reward": 1.4440864324569702, + "reward_std": 0.21193701028823853, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20968188345432281, + "rewards/EvidenceHallucination/std": 0.385572224855423, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 1.4337884187698364, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.30215004086494446, + "rewards/VideoAccuracy/std": 0.45778700709342957, + "step": 627, + "train_speed(iter/s)": 0.017454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 806.0, + "completions/mean_length": 454.3571472167969, + "completions/min_length": 343.0, + "entropy/max": 1.7734375, + "entropy/mean": 0.435546875, + "entropy/min": 0.1376953125, + "epoch": 0.628, + "grad_norm": 1.1677469239179248, + "kl": 0.251953125, + "learning_rate": 6.19516477535077e-07, + "loss": 0.002543492242693901, + "memory(GiB)": 147.2, + "reward": 1.5812627077102661, + "reward_std": 0.26526641845703125, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14464905858039856, + "rewards/EvidenceHallucination/std": 0.3285929560661316, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.797533392906189, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.4285232424736023, + "rewards/VideoAccuracy/std": 0.4419552981853485, + "step": 628, + "train_speed(iter/s)": 0.017464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 628.0, + "completions/mean_length": 448.0, + "completions/min_length": 267.0, + "entropy/max": 1.0859375, + "entropy/mean": 0.51171875, + "entropy/min": 0.2890625, + "epoch": 0.629, + "grad_norm": 1.4038832123707443, + "kl": 0.279296875, + "learning_rate": 6.165837454222607e-07, + "loss": 0.0028509548865258694, + "memory(GiB)": 147.2, + "reward": 1.949292540550232, + "reward_std": 0.18202029168605804, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6369680762290955, + "rewards/EvidenceHallucination/std": 0.3774639964103699, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 0.9169965386390686, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.76475590467453, + "rewards/VideoAccuracy/std": 0.36503830552101135, + "step": 629, + "train_speed(iter/s)": 0.017465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1668.0, + "completions/mean_length": 486.3095397949219, + "completions/min_length": 334.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.419921875, + "entropy/min": 0.1318359375, + "epoch": 0.63, + "grad_norm": 1.0980329531771842, + "kl": 0.267578125, + "learning_rate": 6.136548743068712e-07, + "loss": 0.0029643033631145954, + "memory(GiB)": 147.2, + "reward": 1.3012635707855225, + "reward_std": 0.21980595588684082, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14249320328235626, + "rewards/EvidenceHallucination/std": 0.3265673518180847, + "rewards/Evidence_Num_Record/mean": 5.261904716491699, + "rewards/Evidence_Num_Record/std": 3.3645172119140625, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.17276491224765778, + "rewards/VideoAccuracy/std": 0.3082710802555084, + "step": 630, + "train_speed(iter/s)": 0.017462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 739.0, + "completions/mean_length": 486.3333435058594, + "completions/min_length": 362.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.3359375, + "entropy/min": 0.1796875, + "epoch": 0.631, + "grad_norm": 1.171876469327531, + "kl": 0.251953125, + "learning_rate": 6.107298936826086e-07, + "loss": 0.002529977587983012, + "memory(GiB)": 147.2, + "reward": 1.9414401054382324, + "reward_std": 0.07969477772712708, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32001227140426636, + "rewards/EvidenceHallucination/std": 0.43706533312797546, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.8025076985359192, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6774377226829529, + "rewards/VideoAccuracy/std": 0.5147029161453247, + "step": 631, + "train_speed(iter/s)": 0.01747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 917.0, + "completions/mean_length": 503.3333435058594, + "completions/min_length": 383.0, + "entropy/max": 1.0546875, + "entropy/mean": 0.455078125, + "entropy/min": 0.24609375, + "epoch": 0.632, + "grad_norm": 1.2356169279271694, + "kl": 0.283203125, + "learning_rate": 6.078088330039944e-07, + "loss": 0.002893433440476656, + "memory(GiB)": 147.2, + "reward": 1.5653852224349976, + "reward_std": 0.27202343940734863, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3666588068008423, + "rewards/EvidenceHallucination/std": 0.4319332540035248, + "rewards/Evidence_Num_Record/mean": 5.1666669845581055, + "rewards/Evidence_Num_Record/std": 1.4468917846679688, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.46348196268081665, + "rewards/VideoAccuracy/std": 0.4616753160953522, + "step": 632, + "train_speed(iter/s)": 0.017473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 603.0, + "completions/mean_length": 472.1190490722656, + "completions/min_length": 356.0, + "entropy/max": 0.7890625, + "entropy/mean": 0.447265625, + "entropy/min": 0.287109375, + "epoch": 0.633, + "grad_norm": 1.4942046208513473, + "kl": 0.255859375, + "learning_rate": 6.04891721686078e-07, + "loss": 0.002583180321380496, + "memory(GiB)": 147.2, + "reward": 1.4299159049987793, + "reward_std": 0.4242191016674042, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430335700511932, + "rewards/EvidenceHallucination/mean": 0.2303914874792099, + "rewards/EvidenceHallucination/std": 0.399456262588501, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.0969284772872925, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4047619104385376, + "rewards/HonestTime/std": 0.49679577350616455, + "rewards/VideoAccuracy/mean": 0.3147900402545929, + "rewards/VideoAccuracy/std": 0.35890620946884155, + "step": 633, + "train_speed(iter/s)": 0.017474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 671.0, + "completions/mean_length": 444.1428527832031, + "completions/min_length": 328.0, + "entropy/max": 0.443359375, + "entropy/mean": 0.357421875, + "entropy/min": 0.1787109375, + "epoch": 0.634, + "grad_norm": 1.2530109926746207, + "kl": 0.294921875, + "learning_rate": 6.01978589104138e-07, + "loss": 0.002966479165479541, + "memory(GiB)": 147.2, + "reward": 2.078141689300537, + "reward_std": 0.14186517894268036, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4167763590812683, + "rewards/EvidenceHallucination/std": 0.42956098914146423, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 0.6043781638145447, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8281196355819702, + "rewards/VideoAccuracy/std": 0.7123571038246155, + "step": 634, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 753.0, + "completions/mean_length": 447.19049072265625, + "completions/min_length": 326.0, + "entropy/max": 1.28125, + "entropy/mean": 0.455078125, + "entropy/min": 0.12451171875, + "epoch": 0.635, + "grad_norm": 1.197780254249038, + "kl": 0.263671875, + "learning_rate": 5.990694645933865e-07, + "loss": 0.002655822318047285, + "memory(GiB)": 147.2, + "reward": 1.9410308599472046, + "reward_std": 0.12248065322637558, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5052928328514099, + "rewards/EvidenceHallucination/std": 0.4558258354663849, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.3139561414718628, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7733056545257568, + "rewards/VideoAccuracy/std": 0.47066211700439453, + "step": 635, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 610.0, + "completions/mean_length": 411.71429443359375, + "completions/min_length": 237.0, + "entropy/max": 0.578125, + "entropy/mean": 0.408203125, + "entropy/min": 0.2421875, + "epoch": 0.636, + "grad_norm": 1.3596745525669296, + "kl": 0.28515625, + "learning_rate": 5.961643774486753e-07, + "loss": 0.0028728009201586246, + "memory(GiB)": 147.2, + "reward": 1.8870395421981812, + "reward_std": 0.24997267127037048, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5736715793609619, + "rewards/EvidenceHallucination/std": 0.4410237669944763, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.1129581928253174, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.6437338590621948, + "rewards/VideoAccuracy/std": 0.3046031892299652, + "step": 636, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 576.0, + "completions/mean_length": 419.3571472167969, + "completions/min_length": 286.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.392578125, + "entropy/min": 0.25, + "epoch": 0.637, + "grad_norm": 1.371929928590567, + "kl": 0.279296875, + "learning_rate": 5.932633569241999e-07, + "loss": 0.0028081792406737804, + "memory(GiB)": 147.2, + "reward": 2.0611584186553955, + "reward_std": 0.25506219267845154, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5297446846961975, + "rewards/EvidenceHallucination/std": 0.43175989389419556, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.8968262076377869, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8552095293998718, + "rewards/VideoAccuracy/std": 0.6414136290550232, + "step": 637, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 580.0, + "completions/mean_length": 450.0476379394531, + "completions/min_length": 311.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.341796875, + "entropy/min": 0.1494140625, + "epoch": 0.638, + "grad_norm": 1.2919471704825667, + "kl": 0.25390625, + "learning_rate": 5.903664322332047e-07, + "loss": 0.0025535766035318375, + "memory(GiB)": 147.2, + "reward": 2.08666729927063, + "reward_std": 0.269299179315567, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4039100110530853, + "rewards/EvidenceHallucination/std": 0.458482027053833, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.9699312448501587, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.8820759057998657, + "rewards/VideoAccuracy/std": 0.46471455693244934, + "step": 638, + "train_speed(iter/s)": 0.017486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 639.0, + "completions/mean_length": 450.0238037109375, + "completions/min_length": 344.0, + "entropy/max": 0.71875, + "entropy/mean": 0.44921875, + "entropy/min": 0.310546875, + "epoch": 0.639, + "grad_norm": 1.3534914088870265, + "kl": 0.2734375, + "learning_rate": 5.874736325476889e-07, + "loss": 0.002750057727098465, + "memory(GiB)": 147.2, + "reward": 1.7882862091064453, + "reward_std": 0.10772984474897385, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49877476692199707, + "rewards/EvidenceHallucination/std": 0.44282642006874084, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 0.7904776334762573, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.6266263127326965, + "rewards/VideoAccuracy/std": 0.447672963142395, + "step": 639, + "train_speed(iter/s)": 0.017481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 687.0, + "completions/mean_length": 423.0, + "completions/min_length": 298.0, + "entropy/max": 0.71875, + "entropy/mean": 0.416015625, + "entropy/min": 0.2890625, + "epoch": 0.64, + "grad_norm": 1.288198381472419, + "kl": 0.29296875, + "learning_rate": 5.845849869981136e-07, + "loss": 0.0029476440977305174, + "memory(GiB)": 147.2, + "reward": 1.497715711593628, + "reward_std": 0.15667381882667542, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16764487326145172, + "rewards/EvidenceHallucination/std": 0.3352504074573517, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.8249872326850891, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.3641866147518158, + "rewards/VideoAccuracy/std": 0.5057440996170044, + "step": 640, + "train_speed(iter/s)": 0.017483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 661.0, + "completions/mean_length": 459.3095397949219, + "completions/min_length": 283.0, + "entropy/max": 0.515625, + "entropy/mean": 0.296875, + "entropy/min": 0.1357421875, + "epoch": 0.641, + "grad_norm": 1.1413185333078562, + "kl": 0.255859375, + "learning_rate": 5.817005246731073e-07, + "loss": 0.0025705297011882067, + "memory(GiB)": 147.2, + "reward": 2.4359419345855713, + "reward_std": 0.122245192527771, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6870543956756592, + "rewards/EvidenceHallucination/std": 0.35847052931785583, + "rewards/Evidence_Num_Record/mean": 3.809523820877075, + "rewards/Evidence_Num_Record/std": 0.706696093082428, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9285714626312256, + "rewards/HonestTime/std": 0.26066118478775024, + "rewards/VideoAccuracy/mean": 1.1128168106079102, + "rewards/VideoAccuracy/std": 0.4972495436668396, + "step": 641, + "train_speed(iter/s)": 0.017483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1661.0, + "completions/mean_length": 457.952392578125, + "completions/min_length": 308.0, + "entropy/max": 1.5703125, + "entropy/mean": 0.4921875, + "entropy/min": 0.1220703125, + "epoch": 0.642, + "grad_norm": 1.4330868757528463, + "kl": 0.291015625, + "learning_rate": 5.788202746191734e-07, + "loss": 0.003061532974243164, + "memory(GiB)": 147.2, + "reward": 1.9305227994918823, + "reward_std": 0.23124870657920837, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6630523204803467, + "rewards/EvidenceHallucination/std": 0.3898063004016876, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 4.09044075012207, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7645788788795471, + "rewards/VideoAccuracy/std": 0.39655032753944397, + "step": 642, + "train_speed(iter/s)": 0.017481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 654.0, + "completions/mean_length": 446.0952453613281, + "completions/min_length": 252.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.4140625, + "entropy/min": 0.23828125, + "epoch": 0.643, + "grad_norm": 1.11793454116198, + "kl": 0.2578125, + "learning_rate": 5.759442658403985e-07, + "loss": 0.002604592591524124, + "memory(GiB)": 147.2, + "reward": 1.504928469657898, + "reward_std": 0.1708807647228241, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2890865206718445, + "rewards/EvidenceHallucination/std": 0.41946476697921753, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 1.0493069887161255, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.38044440746307373, + "rewards/VideoAccuracy/std": 0.41669321060180664, + "step": 643, + "train_speed(iter/s)": 0.017481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 684.0, + "completions/mean_length": 455.40478515625, + "completions/min_length": 352.0, + "entropy/max": 0.48046875, + "entropy/mean": 0.361328125, + "entropy/min": 0.177734375, + "epoch": 0.644, + "grad_norm": 1.258093882493503, + "kl": 0.271484375, + "learning_rate": 5.730725272981583e-07, + "loss": 0.0027431691996753216, + "memory(GiB)": 147.2, + "reward": 2.50679087638855, + "reward_std": 0.1791459619998932, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.8189927339553833, + "rewards/EvidenceHallucination/std": 0.3137473165988922, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 0.6043781638145447, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.785714328289032, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 1.1858493089675903, + "rewards/VideoAccuracy/std": 0.4272572696208954, + "step": 644, + "train_speed(iter/s)": 0.017482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 779.0, + "completions/mean_length": 495.66668701171875, + "completions/min_length": 342.0, + "entropy/max": 1.125, + "entropy/mean": 0.474609375, + "entropy/min": 0.1337890625, + "epoch": 0.645, + "grad_norm": 1.0452420162271987, + "kl": 0.267578125, + "learning_rate": 5.702050879108283e-07, + "loss": 0.0027335789054632187, + "memory(GiB)": 147.2, + "reward": 1.7045737504959106, + "reward_std": 0.3052411675453186, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.275814026594162, + "rewards/EvidenceHallucination/std": 0.40204769372940063, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.7550326585769653, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.582744300365448, + "rewards/VideoAccuracy/std": 0.5970212817192078, + "step": 645, + "train_speed(iter/s)": 0.017482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 772.0, + "completions/mean_length": 464.0476379394531, + "completions/min_length": 358.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.44921875, + "entropy/min": 0.30078125, + "epoch": 0.646, + "grad_norm": 1.128949782037839, + "kl": 0.279296875, + "learning_rate": 5.673419765534915e-07, + "loss": 0.0028231206815689802, + "memory(GiB)": 147.2, + "reward": 1.3619728088378906, + "reward_std": 0.24435554444789886, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.16221894323825836, + "rewards/EvidenceHallucination/std": 0.3455430269241333, + "rewards/Evidence_Num_Record/mean": 4.309524059295654, + "rewards/Evidence_Num_Record/std": 0.9750071167945862, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.23905286192893982, + "rewards/VideoAccuracy/std": 0.28250643610954285, + "step": 646, + "train_speed(iter/s)": 0.01748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 749.0, + "completions/mean_length": 430.0714416503906, + "completions/min_length": 311.0, + "entropy/max": 0.953125, + "entropy/mean": 0.45703125, + "entropy/min": 0.29296875, + "epoch": 0.647, + "grad_norm": 1.3864010257948174, + "kl": 0.2890625, + "learning_rate": 5.644832220576479e-07, + "loss": 0.0029104407876729965, + "memory(GiB)": 147.2, + "reward": 2.0799288749694824, + "reward_std": 0.16969692707061768, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6771750450134277, + "rewards/EvidenceHallucination/std": 0.3804674744606018, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 1.0075210332870483, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.8492557406425476, + "rewards/VideoAccuracy/std": 0.4122805595397949, + "step": 647, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 894.0, + "completions/mean_length": 508.3809509277344, + "completions/min_length": 318.0, + "entropy/max": 0.8984375, + "entropy/mean": 0.384765625, + "entropy/min": 0.158203125, + "epoch": 0.648, + "grad_norm": 1.0362772461187326, + "kl": 0.2451171875, + "learning_rate": 5.616288532109224e-07, + "loss": 0.0024698299821466208, + "memory(GiB)": 147.2, + "reward": 1.8865734338760376, + "reward_std": 0.09468773007392883, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5207627415657043, + "rewards/EvidenceHallucination/std": 0.46173611283302307, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 0.8913052082061768, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6490874290466309, + "rewards/VideoAccuracy/std": 0.387030690908432, + "step": 648, + "train_speed(iter/s)": 0.017484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 678.0, + "completions/mean_length": 466.8333435058594, + "completions/min_length": 344.0, + "entropy/max": 1.1640625, + "entropy/mean": 0.50390625, + "entropy/min": 0.275390625, + "epoch": 0.649, + "grad_norm": 1.3311556860146998, + "kl": 0.279296875, + "learning_rate": 5.587788987567785e-07, + "loss": 0.002829805016517639, + "memory(GiB)": 147.2, + "reward": 1.5995934009552002, + "reward_std": 0.3568987548351288, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3610594570636749, + "rewards/EvidenceHallucination/std": 0.42795902490615845, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.2540295124053955, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.4654768109321594, + "rewards/VideoAccuracy/std": 0.43665531277656555, + "step": 649, + "train_speed(iter/s)": 0.017492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 562.0, + "completions/mean_length": 437.2857360839844, + "completions/min_length": 347.0, + "entropy/max": 0.640625, + "entropy/mean": 0.435546875, + "entropy/min": 0.306640625, + "epoch": 0.65, + "grad_norm": 1.2377606703225692, + "kl": 0.28515625, + "learning_rate": 5.559333873942258e-07, + "loss": 0.0028525185771286488, + "memory(GiB)": 147.2, + "reward": 1.6477826833724976, + "reward_std": 0.21032457053661346, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3635348975658417, + "rewards/EvidenceHallucination/std": 0.416638046503067, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.6625891327857971, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.479837566614151, + "rewards/VideoAccuracy/std": 0.4521355926990509, + "step": 650, + "train_speed(iter/s)": 0.017475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 780.0, + "completions/mean_length": 535.7142944335938, + "completions/min_length": 384.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.30078125, + "entropy/min": 0.130859375, + "epoch": 0.651, + "grad_norm": 1.1056339325086264, + "kl": 0.2275390625, + "learning_rate": 5.530923477775322e-07, + "loss": 0.0022936025634407997, + "memory(GiB)": 147.2, + "reward": 2.013475179672241, + "reward_std": 0.3081175982952118, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33621248602867126, + "rewards/EvidenceHallucination/std": 0.42277342081069946, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 0.7485952973365784, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.7509944438934326, + "rewards/VideoAccuracy/std": 0.5117350220680237, + "step": 651, + "train_speed(iter/s)": 0.017477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 960.0, + "completions/mean_length": 490.0476379394531, + "completions/min_length": 348.0, + "entropy/max": 2.140625, + "entropy/mean": 0.58984375, + "entropy/min": 0.28125, + "epoch": 0.652, + "grad_norm": 0.8452958423486069, + "kl": 0.271484375, + "learning_rate": 5.502558085159344e-07, + "loss": 0.0027937653940171003, + "memory(GiB)": 147.2, + "reward": 1.9492918252944946, + "reward_std": 0.09397362172603607, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.0, + "rewards/EvidenceHallucination/std": 0.0, + "rewards/Evidence_Num_Record/mean": 5.261904716491699, + "rewards/Evidence_Num_Record/std": 1.9638131856918335, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.91595858335495, + "rewards/VideoAccuracy/std": 0.2248149961233139, + "step": 652, + "train_speed(iter/s)": 0.017472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 721.0, + "completions/mean_length": 472.3571472167969, + "completions/min_length": 263.0, + "entropy/max": 0.671875, + "entropy/mean": 0.431640625, + "entropy/min": 0.291015625, + "epoch": 0.653, + "grad_norm": 1.4113041808354707, + "kl": 0.259765625, + "learning_rate": 5.47423798173352e-07, + "loss": 0.0026171018835157156, + "memory(GiB)": 147.2, + "reward": 1.7929911613464355, + "reward_std": 0.17371951043605804, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4915204346179962, + "rewards/EvidenceHallucination/std": 0.4581449031829834, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.5809552669525146, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.6375442147254944, + "rewards/VideoAccuracy/std": 0.3973805904388428, + "step": 653, + "train_speed(iter/s)": 0.017473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 587.0, + "completions/mean_length": 427.5238037109375, + "completions/min_length": 299.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.35546875, + "entropy/min": 0.1748046875, + "epoch": 0.654, + "grad_norm": 1.1434917947803622, + "kl": 0.26953125, + "learning_rate": 5.445963452680973e-07, + "loss": 0.0027125473134219646, + "memory(GiB)": 147.2, + "reward": 2.280250310897827, + "reward_std": 0.14925819635391235, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6781247854232788, + "rewards/EvidenceHallucination/std": 0.41727301478385925, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.6559829115867615, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.785714328289032, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 0.9874823689460754, + "rewards/VideoAccuracy/std": 0.49428799748420715, + "step": 654, + "train_speed(iter/s)": 0.017478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1198.0, + "completions/mean_length": 491.7857360839844, + "completions/min_length": 320.0, + "entropy/max": 1.6640625, + "entropy/mean": 0.484375, + "entropy/min": 0.1474609375, + "epoch": 0.655, + "grad_norm": 1.0853257983035267, + "kl": 0.25, + "learning_rate": 5.417734782725896e-07, + "loss": 0.002571134828031063, + "memory(GiB)": 147.2, + "reward": 1.7250261306762695, + "reward_std": 0.27739259600639343, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20502276718616486, + "rewards/EvidenceHallucination/std": 0.3402935564517975, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 2.2959086894989014, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6173549294471741, + "rewards/VideoAccuracy/std": 0.5183524489402771, + "step": 655, + "train_speed(iter/s)": 0.017475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 594.0, + "completions/mean_length": 444.0238037109375, + "completions/min_length": 302.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.41015625, + "entropy/min": 0.28125, + "epoch": 0.656, + "grad_norm": 1.246969051558, + "kl": 0.2890625, + "learning_rate": 5.389552256130689e-07, + "loss": 0.0029038935899734497, + "memory(GiB)": 147.2, + "reward": 1.62190580368042, + "reward_std": 0.21904978156089783, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40573424100875854, + "rewards/EvidenceHallucination/std": 0.45892637968063354, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 0.8890219330787659, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.41694924235343933, + "rewards/VideoAccuracy/std": 0.3788967728614807, + "step": 656, + "train_speed(iter/s)": 0.017475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 530.0, + "completions/mean_length": 429.9285888671875, + "completions/min_length": 299.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.43359375, + "entropy/min": 0.3125, + "epoch": 0.657, + "grad_norm": 1.2243096462731131, + "kl": 0.287109375, + "learning_rate": 5.361416156693075e-07, + "loss": 0.0028951808344572783, + "memory(GiB)": 147.2, + "reward": 1.3817565441131592, + "reward_std": 0.17917168140411377, + "rewards/EvidenceFormat/mean": 0.9523809552192688, + "rewards/EvidenceFormat/std": 0.21554026007652283, + "rewards/EvidenceHallucination/mean": 0.19264882802963257, + "rewards/EvidenceHallucination/std": 0.37777388095855713, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 1.143582820892334, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.2717982530593872, + "rewards/VideoAccuracy/std": 0.37504634261131287, + "step": 657, + "train_speed(iter/s)": 0.017487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 706.0, + "completions/mean_length": 508.1428527832031, + "completions/min_length": 322.0, + "entropy/max": 1.5859375, + "entropy/mean": 0.443359375, + "entropy/min": 0.17578125, + "epoch": 0.658, + "grad_norm": 1.095017114903171, + "kl": 0.2294921875, + "learning_rate": 5.333326767743263e-07, + "loss": 0.0023005008697509766, + "memory(GiB)": 147.2, + "reward": 2.0157835483551025, + "reward_std": 0.2706416845321655, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6202226877212524, + "rewards/EvidenceHallucination/std": 0.38593390583992004, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.2484601736068726, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.7631673216819763, + "rewards/VideoAccuracy/std": 0.3701137602329254, + "step": 658, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 534.0, + "completions/mean_length": 445.3333435058594, + "completions/min_length": 349.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.451171875, + "entropy/min": 0.29296875, + "epoch": 0.659, + "grad_norm": 1.3566480574860367, + "kl": 0.279296875, + "learning_rate": 5.305284372141095e-07, + "loss": 0.002823568880558014, + "memory(GiB)": 147.2, + "reward": 1.701953411102295, + "reward_std": 0.31828799843788147, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4799630343914032, + "rewards/EvidenceHallucination/std": 0.4651428461074829, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 0.8239306807518005, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5440560579299927, + "rewards/VideoAccuracy/std": 0.43683817982673645, + "step": 659, + "train_speed(iter/s)": 0.017487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 929.0, + "completions/mean_length": 471.69049072265625, + "completions/min_length": 317.0, + "entropy/max": 0.6953125, + "entropy/mean": 0.451171875, + "entropy/min": 0.310546875, + "epoch": 0.66, + "grad_norm": 1.2157016101281084, + "kl": 0.2421875, + "learning_rate": 5.277289252273174e-07, + "loss": 0.002447072882205248, + "memory(GiB)": 147.2, + "reward": 1.294893503189087, + "reward_std": 0.20156048238277435, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.13871105015277863, + "rewards/EvidenceHallucination/std": 0.3208237886428833, + "rewards/Evidence_Num_Record/mean": 4.642857074737549, + "rewards/Evidence_Num_Record/std": 1.935816764831543, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.20048469305038452, + "rewards/VideoAccuracy/std": 0.31244516372680664, + "step": 660, + "train_speed(iter/s)": 0.017485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 704.0, + "completions/mean_length": 496.0714416503906, + "completions/min_length": 346.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.333984375, + "entropy/min": 0.1650390625, + "epoch": 0.661, + "grad_norm": 0.9492405219351743, + "kl": 0.234375, + "learning_rate": 5.249341690050051e-07, + "loss": 0.002568890806287527, + "memory(GiB)": 147.2, + "reward": 1.9744826555252075, + "reward_std": 0.18958406150341034, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35581013560295105, + "rewards/EvidenceHallucination/std": 0.42360445857048035, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.6398497223854065, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.7080825567245483, + "rewards/VideoAccuracy/std": 0.6018710732460022, + "step": 661, + "train_speed(iter/s)": 0.017487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 763.0, + "completions/mean_length": 495.6428527832031, + "completions/min_length": 320.0, + "entropy/max": 1.3125, + "entropy/mean": 0.59375, + "entropy/min": 0.29296875, + "epoch": 0.662, + "grad_norm": 1.2389370789230514, + "kl": 0.287109375, + "learning_rate": 5.22144196690337e-07, + "loss": 0.0029177777469158173, + "memory(GiB)": 147.2, + "reward": 1.6555906534194946, + "reward_std": 0.21660292148590088, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45820945501327515, + "rewards/EvidenceHallucination/std": 0.39233002066612244, + "rewards/Evidence_Num_Record/mean": 5.214285850524902, + "rewards/Evidence_Num_Record/std": 1.6160130500793457, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777005434036255, + "rewards/VideoAccuracy/mean": 0.5401391386985779, + "rewards/VideoAccuracy/std": 0.4661349058151245, + "step": 662, + "train_speed(iter/s)": 0.017488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 633.0, + "completions/mean_length": 477.0, + "completions/min_length": 274.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.462890625, + "entropy/min": 0.2734375, + "epoch": 0.663, + "grad_norm": 1.455002442545534, + "kl": 0.25390625, + "learning_rate": 5.193590363783027e-07, + "loss": 0.0025626281276345253, + "memory(GiB)": 147.2, + "reward": 1.7300691604614258, + "reward_std": 0.2832028269767761, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4435245990753174, + "rewards/EvidenceHallucination/std": 0.42624273896217346, + "rewards/Evidence_Num_Record/mean": 4.785714149475098, + "rewards/Evidence_Num_Record/std": 1.4903874397277832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5794594287872314, + "rewards/VideoAccuracy/std": 0.4444909691810608, + "step": 663, + "train_speed(iter/s)": 0.017489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 546.0, + "completions/mean_length": 432.71429443359375, + "completions/min_length": 283.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.37109375, + "entropy/min": 0.19921875, + "epoch": 0.664, + "grad_norm": 1.3961964678083207, + "kl": 0.279296875, + "learning_rate": 5.16578716115436e-07, + "loss": 0.0028040807228535414, + "memory(GiB)": 147.2, + "reward": 2.2781004905700684, + "reward_std": 0.18304118514060974, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7079420685768127, + "rewards/EvidenceHallucination/std": 0.38466620445251465, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.6005223989486694, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.9746074080467224, + "rewards/VideoAccuracy/std": 0.4673185646533966, + "step": 664, + "train_speed(iter/s)": 0.017491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/mean_length": 520.6904907226562, + "completions/min_length": 393.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.365234375, + "entropy/min": 0.138671875, + "epoch": 0.665, + "grad_norm": 1.066138602840599, + "kl": 0.234375, + "learning_rate": 5.138032638995315e-07, + "loss": 0.002420675940811634, + "memory(GiB)": 147.2, + "reward": 2.034099578857422, + "reward_std": 0.2014666646718979, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49243220686912537, + "rewards/EvidenceHallucination/std": 0.47104111313819885, + "rewards/Evidence_Num_Record/mean": 5.238095283508301, + "rewards/Evidence_Num_Record/std": 2.721275806427002, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8689465522766113, + "rewards/VideoAccuracy/std": 0.36233431100845337, + "step": 665, + "train_speed(iter/s)": 0.017487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 792.0, + "completions/mean_length": 464.8095397949219, + "completions/min_length": 318.0, + "entropy/max": 1.03125, + "entropy/mean": 0.435546875, + "entropy/min": 0.193359375, + "epoch": 0.666, + "grad_norm": 1.089488383391917, + "kl": 0.287109375, + "learning_rate": 5.110327076793612e-07, + "loss": 0.002939028199762106, + "memory(GiB)": 147.2, + "reward": 1.6147891283035278, + "reward_std": 0.17048370838165283, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4267515540122986, + "rewards/EvidenceHallucination/std": 0.44840237498283386, + "rewards/Evidence_Num_Record/mean": 4.761904716491699, + "rewards/Evidence_Num_Record/std": 1.2842293977737427, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.47229599952697754, + "rewards/VideoAccuracy/std": 0.42426127195358276, + "step": 666, + "train_speed(iter/s)": 0.017469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 614.0, + "completions/mean_length": 451.26190185546875, + "completions/min_length": 341.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.43359375, + "entropy/min": 0.31640625, + "epoch": 0.667, + "grad_norm": 1.37364539682683, + "kl": 0.294921875, + "learning_rate": 5.08267075354396e-07, + "loss": 0.0029405485838651657, + "memory(GiB)": 147.2, + "reward": 2.1241273880004883, + "reward_std": 0.2718643546104431, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.697299599647522, + "rewards/EvidenceHallucination/std": 0.40521708130836487, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.148902416229248, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8846674561500549, + "rewards/VideoAccuracy/std": 0.5274243354797363, + "step": 667, + "train_speed(iter/s)": 0.017473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 905.0, + "completions/mean_length": 538.7380981445312, + "completions/min_length": 315.0, + "entropy/max": 1.6171875, + "entropy/mean": 0.380859375, + "entropy/min": 0.134765625, + "epoch": 0.668, + "grad_norm": 0.9613458390599403, + "kl": 0.2275390625, + "learning_rate": 5.055063947745233e-07, + "loss": 0.0023003662936389446, + "memory(GiB)": 147.2, + "reward": 2.047619342803955, + "reward_std": 0.21020303666591644, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29439035058021545, + "rewards/EvidenceHallucination/std": 0.427455335855484, + "rewards/Evidence_Num_Record/mean": 4.309524059295654, + "rewards/Evidence_Num_Record/std": 0.8692047595977783, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8554080724716187, + "rewards/VideoAccuracy/std": 0.5128820538520813, + "step": 668, + "train_speed(iter/s)": 0.017472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 856.0, + "completions/mean_length": 505.16668701171875, + "completions/min_length": 353.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.404296875, + "entropy/min": 0.267578125, + "epoch": 0.669, + "grad_norm": 1.2010642006938999, + "kl": 0.259765625, + "learning_rate": 5.027506937397652e-07, + "loss": 0.0026324428617954254, + "memory(GiB)": 147.2, + "reward": 1.830915927886963, + "reward_std": 0.20380070805549622, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5770098567008972, + "rewards/EvidenceHallucination/std": 0.4214893579483032, + "rewards/Evidence_Num_Record/mean": 5.38095235824585, + "rewards/Evidence_Num_Record/std": 2.1521670818328857, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.6631327867507935, + "rewards/VideoAccuracy/std": 0.4034560024738312, + "step": 669, + "train_speed(iter/s)": 0.017474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 751.0, + "completions/mean_length": 455.71429443359375, + "completions/min_length": 202.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.458984375, + "entropy/min": 0.28515625, + "epoch": 0.67, + "grad_norm": 1.145327570121584, + "kl": 0.296875, + "learning_rate": 5.000000000000002e-07, + "loss": 0.0029850280843675137, + "memory(GiB)": 147.2, + "reward": 1.5763933658599854, + "reward_std": 0.1424618363380432, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4487646818161011, + "rewards/EvidenceHallucination/std": 0.457754522562027, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 1.4283100366592407, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.45330697298049927, + "rewards/VideoAccuracy/std": 0.3984241187572479, + "step": 670, + "train_speed(iter/s)": 0.017474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 833.0, + "completions/mean_length": 514.8333129882812, + "completions/min_length": 323.0, + "entropy/max": 0.625, + "entropy/mean": 0.306640625, + "entropy/min": 0.15625, + "epoch": 0.671, + "grad_norm": 1.0521617393984861, + "kl": 0.2216796875, + "learning_rate": 4.972543412546841e-07, + "loss": 0.0022550090216100216, + "memory(GiB)": 147.2, + "reward": 2.2052764892578125, + "reward_std": 0.18084117770195007, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5524303913116455, + "rewards/EvidenceHallucination/std": 0.40628620982170105, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.9865530133247375, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8995524048805237, + "rewards/VideoAccuracy/std": 0.40781137347221375, + "step": 671, + "train_speed(iter/s)": 0.017474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2349.0, + "completions/mean_length": 590.6666870117188, + "completions/min_length": 329.0, + "entropy/max": 1.9453125, + "entropy/mean": 0.478515625, + "entropy/min": 0.1435546875, + "epoch": 0.672, + "grad_norm": 1.0176091140296963, + "kl": 0.2490234375, + "learning_rate": 4.945137451525706e-07, + "loss": 0.002639862708747387, + "memory(GiB)": 147.2, + "reward": 1.3738744258880615, + "reward_std": 0.21433894336223602, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29613929986953735, + "rewards/EvidenceHallucination/std": 0.43096432089805603, + "rewards/Evidence_Num_Record/mean": 6.690476417541504, + "rewards/Evidence_Num_Record/std": 5.019416332244873, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.28607502579689026, + "rewards/VideoAccuracy/std": 0.4201844334602356, + "step": 672, + "train_speed(iter/s)": 0.017464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1103.0, + "completions/mean_length": 545.7857055664062, + "completions/min_length": 350.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.431640625, + "entropy/min": 0.263671875, + "epoch": 0.673, + "grad_norm": 1.0327069583898099, + "kl": 0.2412109375, + "learning_rate": 4.91778239291431e-07, + "loss": 0.002442984376102686, + "memory(GiB)": 147.2, + "reward": 1.2752413749694824, + "reward_std": 0.19907771050930023, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.190188929438591, + "rewards/EvidenceHallucination/std": 0.3720424771308899, + "rewards/Evidence_Num_Record/mean": 5.404761791229248, + "rewards/Evidence_Num_Record/std": 3.485576629638672, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.2038702368736267, + "rewards/VideoAccuracy/std": 0.33589303493499756, + "step": 673, + "train_speed(iter/s)": 0.017465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 566.0, + "completions/mean_length": 445.5714416503906, + "completions/min_length": 342.0, + "entropy/max": 0.53125, + "entropy/mean": 0.34765625, + "entropy/min": 0.169921875, + "epoch": 0.674, + "grad_norm": 1.1536689632597585, + "kl": 0.287109375, + "learning_rate": 4.890478512177795e-07, + "loss": 0.002904066815972328, + "memory(GiB)": 147.2, + "reward": 2.2410874366760254, + "reward_std": 0.1582295447587967, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.653382420539856, + "rewards/EvidenceHallucination/std": 0.40857383608818054, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.8811485767364502, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.9485061168670654, + "rewards/VideoAccuracy/std": 0.4866093099117279, + "step": 674, + "train_speed(iter/s)": 0.01747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1188.0, + "completions/mean_length": 520.90478515625, + "completions/min_length": 350.0, + "entropy/max": 1.0859375, + "entropy/mean": 0.49609375, + "entropy/min": 0.2158203125, + "epoch": 0.675, + "grad_norm": 1.2732652889309997, + "kl": 0.251953125, + "learning_rate": 4.863226084265939e-07, + "loss": 0.002592825796455145, + "memory(GiB)": 147.2, + "reward": 2.215259313583374, + "reward_std": 0.0945897102355957, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6706887483596802, + "rewards/EvidenceHallucination/std": 0.3234904706478119, + "rewards/Evidence_Num_Record/mean": 4.904761791229248, + "rewards/Evidence_Num_Record/std": 1.7364039421081543, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 1.014454960823059, + "rewards/VideoAccuracy/std": 0.18321140110492706, + "step": 675, + "train_speed(iter/s)": 0.01747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 824.0, + "completions/mean_length": 464.0714416503906, + "completions/min_length": 290.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.453125, + "entropy/min": 0.263671875, + "epoch": 0.676, + "grad_norm": 1.4533553837831945, + "kl": 0.275390625, + "learning_rate": 4.836025383610382e-07, + "loss": 0.0027627265080809593, + "memory(GiB)": 147.2, + "reward": 1.8260008096694946, + "reward_std": 0.19941721856594086, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6072930693626404, + "rewards/EvidenceHallucination/std": 0.46310192346572876, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.6265795230865479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.637875497341156, + "rewards/VideoAccuracy/std": 0.4270996153354645, + "step": 676, + "train_speed(iter/s)": 0.017467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1144.0, + "completions/mean_length": 484.3809509277344, + "completions/min_length": 367.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.421875, + "entropy/min": 0.1962890625, + "epoch": 0.677, + "grad_norm": 1.1999298873211424, + "kl": 0.279296875, + "learning_rate": 4.808876684121881e-07, + "loss": 0.0028530347626656294, + "memory(GiB)": 147.2, + "reward": 1.7600337266921997, + "reward_std": 0.11497774720191956, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4731876254081726, + "rewards/EvidenceHallucination/std": 0.4828210771083832, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 2.1445987224578857, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008703470230103, + "rewards/VideoAccuracy/mean": 0.579681932926178, + "rewards/VideoAccuracy/std": 0.584205687046051, + "step": 677, + "train_speed(iter/s)": 0.017469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 624.0, + "completions/mean_length": 470.8571472167969, + "completions/min_length": 319.0, + "entropy/max": 1.28125, + "entropy/mean": 0.357421875, + "entropy/min": 0.12353515625, + "epoch": 0.678, + "grad_norm": 1.1855086211602954, + "kl": 0.2392578125, + "learning_rate": 4.781780259187542e-07, + "loss": 0.002429666928946972, + "memory(GiB)": 147.2, + "reward": 2.1749234199523926, + "reward_std": 0.13399794697761536, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6821054220199585, + "rewards/EvidenceHallucination/std": 0.3648480176925659, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.9320714473724365, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.905168890953064, + "rewards/VideoAccuracy/std": 0.2352881133556366, + "step": 678, + "train_speed(iter/s)": 0.01747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 713.0, + "completions/mean_length": 459.71429443359375, + "completions/min_length": 292.0, + "entropy/max": 1.515625, + "entropy/mean": 0.46875, + "entropy/min": 0.3203125, + "epoch": 0.679, + "grad_norm": 1.3761276944643144, + "kl": 0.26171875, + "learning_rate": 4.7547363816680564e-07, + "loss": 0.002636173740029335, + "memory(GiB)": 147.2, + "reward": 1.8883533477783203, + "reward_std": 0.22926215827465057, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6010059118270874, + "rewards/EvidenceHallucination/std": 0.40532511472702026, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.148902416229248, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.7062475085258484, + "rewards/VideoAccuracy/std": 0.39731258153915405, + "step": 679, + "train_speed(iter/s)": 0.017468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1857.0, + "completions/mean_length": 522.1904907226562, + "completions/min_length": 340.0, + "entropy/max": 0.578125, + "entropy/mean": 0.427734375, + "entropy/min": 0.296875, + "epoch": 0.68, + "grad_norm": 1.2514499961252392, + "kl": 0.26171875, + "learning_rate": 4.727745323894975e-07, + "loss": 0.0026679779402911663, + "memory(GiB)": 147.2, + "reward": 1.598029375076294, + "reward_std": 0.24943912029266357, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3181222677230835, + "rewards/EvidenceHallucination/std": 0.4199323356151581, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 1.9666203260421753, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.47250017523765564, + "rewards/VideoAccuracy/std": 0.5534753203392029, + "step": 680, + "train_speed(iter/s)": 0.017471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 784.0, + "completions/mean_length": 504.0952453613281, + "completions/min_length": 330.0, + "entropy/max": 0.484375, + "entropy/mean": 0.283203125, + "entropy/min": 0.1337890625, + "epoch": 0.681, + "grad_norm": 1.0238181065161005, + "kl": 0.2275390625, + "learning_rate": 4.700807357667952e-07, + "loss": 0.0023045698180794716, + "memory(GiB)": 147.2, + "reward": 2.1714138984680176, + "reward_std": 0.05831073969602585, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49484798312187195, + "rewards/EvidenceHallucination/std": 0.41957953572273254, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.9084069728851318, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8724439740180969, + "rewards/VideoAccuracy/std": 0.4067482352256775, + "step": 681, + "train_speed(iter/s)": 0.017472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/mean_length": 573.4761962890625, + "completions/min_length": 379.0, + "entropy/max": 1.7265625, + "entropy/mean": 0.4609375, + "entropy/min": 0.1962890625, + "epoch": 0.682, + "grad_norm": 0.9955981566865121, + "kl": 0.251953125, + "learning_rate": 4.673922754252001e-07, + "loss": 0.002564347116276622, + "memory(GiB)": 147.2, + "reward": 1.5866285562515259, + "reward_std": 0.14283575117588043, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3923703134059906, + "rewards/EvidenceHallucination/std": 0.4100193977355957, + "rewards/Evidence_Num_Record/mean": 6.0714287757873535, + "rewards/Evidence_Num_Record/std": 2.699528932571411, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777005434036255, + "rewards/VideoAccuracy/mean": 0.48434481024742126, + "rewards/VideoAccuracy/std": 0.4688479006290436, + "step": 682, + "train_speed(iter/s)": 0.017467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1168.0, + "completions/mean_length": 481.21429443359375, + "completions/min_length": 286.0, + "entropy/max": 0.79296875, + "entropy/mean": 0.427734375, + "entropy/min": 0.12890625, + "epoch": 0.683, + "grad_norm": 1.4492171218624827, + "kl": 0.259765625, + "learning_rate": 4.6470917843747845e-07, + "loss": 0.0026228614151477814, + "memory(GiB)": 147.2, + "reward": 1.453622579574585, + "reward_std": 0.37589478492736816, + "rewards/EvidenceFormat/mean": 0.9523809552192688, + "rewards/EvidenceFormat/std": 0.21554027497768402, + "rewards/EvidenceHallucination/mean": 0.30742937326431274, + "rewards/EvidenceHallucination/std": 0.4408752918243408, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.4010117053985596, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.38737478852272034, + "rewards/VideoAccuracy/std": 0.48056450486183167, + "step": 683, + "train_speed(iter/s)": 0.017465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 768.0, + "completions/mean_length": 516.0238037109375, + "completions/min_length": 351.0, + "entropy/max": 0.47265625, + "entropy/mean": 0.3125, + "entropy/min": 0.1455078125, + "epoch": 0.684, + "grad_norm": 1.1759029669622152, + "kl": 0.263671875, + "learning_rate": 4.620314718223876e-07, + "loss": 0.0026608225889503956, + "memory(GiB)": 147.2, + "reward": 2.253122568130493, + "reward_std": 0.3057974874973297, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6317847371101379, + "rewards/EvidenceHallucination/std": 0.4286155700683594, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.2087563276290894, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9600986242294312, + "rewards/VideoAccuracy/std": 0.4437883496284485, + "step": 684, + "train_speed(iter/s)": 0.017456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 799.0, + "completions/mean_length": 505.5952453613281, + "completions/min_length": 318.0, + "entropy/max": 1.25, + "entropy/mean": 0.46484375, + "entropy/min": 0.1259765625, + "epoch": 0.685, + "grad_norm": 1.2982480360663333, + "kl": 0.259765625, + "learning_rate": 4.5935918254440274e-07, + "loss": 0.0026418499182909727, + "memory(GiB)": 147.2, + "reward": 2.0587644577026367, + "reward_std": 0.16774247586727142, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7660202980041504, + "rewards/EvidenceHallucination/std": 0.3533811569213867, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.8377888202667236, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8388935327529907, + "rewards/VideoAccuracy/std": 0.3307531774044037, + "step": 685, + "train_speed(iter/s)": 0.017459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 928.0, + "completions/mean_length": 483.6428527832031, + "completions/min_length": 312.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.44921875, + "entropy/min": 0.26953125, + "epoch": 0.686, + "grad_norm": 1.086242695905268, + "kl": 0.259765625, + "learning_rate": 4.566923375134472e-07, + "loss": 0.002612657379359007, + "memory(GiB)": 147.2, + "reward": 1.5800120830535889, + "reward_std": 0.1780678629875183, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4063895642757416, + "rewards/EvidenceHallucination/std": 0.434430867433548, + "rewards/Evidence_Num_Record/mean": 4.642857074737549, + "rewards/Evidence_Num_Record/std": 1.5899291038513184, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.43682926893234253, + "rewards/VideoAccuracy/std": 0.42221078276634216, + "step": 686, + "train_speed(iter/s)": 0.017457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 705.0, + "completions/mean_length": 464.23809814453125, + "completions/min_length": 343.0, + "entropy/max": 0.515625, + "entropy/mean": 0.41796875, + "entropy/min": 0.287109375, + "epoch": 0.687, + "grad_norm": 1.1617117839348086, + "kl": 0.287109375, + "learning_rate": 4.540309635846209e-07, + "loss": 0.002888549119234085, + "memory(GiB)": 147.2, + "reward": 1.6990382671356201, + "reward_std": 0.2361672967672348, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39703914523124695, + "rewards/EvidenceHallucination/std": 0.45230522751808167, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 1.1506701707839966, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.5196302533149719, + "rewards/VideoAccuracy/std": 0.5318339467048645, + "step": 687, + "train_speed(iter/s)": 0.01746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 807.0, + "completions/mean_length": 576.1428833007812, + "completions/min_length": 339.0, + "entropy/max": 1.8203125, + "entropy/mean": 0.35546875, + "entropy/min": 0.130859375, + "epoch": 0.688, + "grad_norm": 1.0048234365439954, + "kl": 0.2119140625, + "learning_rate": 4.513750875579303e-07, + "loss": 0.0021487295161932707, + "memory(GiB)": 147.2, + "reward": 2.0577375888824463, + "reward_std": 0.2570165693759918, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6128485202789307, + "rewards/EvidenceHallucination/std": 0.4469565153121948, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 0.9912508726119995, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8018344640731812, + "rewards/VideoAccuracy/std": 0.4513843059539795, + "step": 688, + "train_speed(iter/s)": 0.017459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1711.0, + "completions/mean_length": 561.547607421875, + "completions/min_length": 364.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.39453125, + "entropy/min": 0.154296875, + "epoch": 0.689, + "grad_norm": 1.1651389191194548, + "kl": 0.24609375, + "learning_rate": 4.487247361780169e-07, + "loss": 0.0025680987164378166, + "memory(GiB)": 147.2, + "reward": 1.6228870153427124, + "reward_std": 0.1611291617155075, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40475350618362427, + "rewards/EvidenceHallucination/std": 0.4765009880065918, + "rewards/Evidence_Num_Record/mean": 6.642857074737549, + "rewards/Evidence_Num_Record/std": 4.705064296722412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.48955535888671875, + "rewards/VideoAccuracy/std": 0.4310154318809509, + "step": 689, + "train_speed(iter/s)": 0.01745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 710.0, + "completions/mean_length": 460.7857360839844, + "completions/min_length": 343.0, + "entropy/max": 0.7734375, + "entropy/mean": 0.44921875, + "entropy/min": 0.322265625, + "epoch": 0.69, + "grad_norm": 1.2929031857591144, + "kl": 0.29296875, + "learning_rate": 4.460799361338897e-07, + "loss": 0.0029428384732455015, + "memory(GiB)": 147.2, + "reward": 1.5768420696258545, + "reward_std": 0.29188913106918335, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31747567653656006, + "rewards/EvidenceHallucination/std": 0.4247772693634033, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.5294249057769775, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.48001348972320557, + "rewards/VideoAccuracy/std": 0.6256462335586548, + "step": 690, + "train_speed(iter/s)": 0.017453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 886.0, + "completions/mean_length": 520.1666870117188, + "completions/min_length": 350.0, + "entropy/max": 0.51953125, + "entropy/mean": 0.294921875, + "entropy/min": 0.115234375, + "epoch": 0.691, + "grad_norm": 1.0833730957131797, + "kl": 0.23828125, + "learning_rate": 4.4344071405865645e-07, + "loss": 0.002413667505607009, + "memory(GiB)": 147.2, + "reward": 1.9608988761901855, + "reward_std": 0.2161414921283722, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5640000700950623, + "rewards/EvidenceHallucination/std": 0.47513172030448914, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 0.9320714473724365, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.6576226949691772, + "rewards/VideoAccuracy/std": 0.36465051770210266, + "step": 691, + "train_speed(iter/s)": 0.017455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 729.0, + "completions/mean_length": 506.2857360839844, + "completions/min_length": 295.0, + "entropy/max": 2.03125, + "entropy/mean": 0.60546875, + "entropy/min": 0.30078125, + "epoch": 0.692, + "grad_norm": 1.0704807173778, + "kl": 0.275390625, + "learning_rate": 4.408070965292533e-07, + "loss": 0.0028132570441812277, + "memory(GiB)": 147.2, + "reward": 1.4591788053512573, + "reward_std": 0.24840104579925537, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2568591833114624, + "rewards/EvidenceHallucination/std": 0.37475547194480896, + "rewards/Evidence_Num_Record/mean": 5.404761791229248, + "rewards/Evidence_Num_Record/std": 1.4492979049682617, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3744736611843109, + "rewards/VideoAccuracy/std": 0.45563003420829773, + "step": 692, + "train_speed(iter/s)": 0.017459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 665.0, + "completions/mean_length": 427.3333435058594, + "completions/min_length": 279.0, + "entropy/max": 0.65625, + "entropy/mean": 0.4453125, + "entropy/min": 0.3125, + "epoch": 0.693, + "grad_norm": 1.245436341733027, + "kl": 0.275390625, + "learning_rate": 4.381791100661798e-07, + "loss": 0.002816341584548354, + "memory(GiB)": 147.2, + "reward": 1.4305661916732788, + "reward_std": 0.19407030940055847, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29068663716316223, + "rewards/EvidenceHallucination/std": 0.423583060503006, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 1.1466256380081177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3390955328941345, + "rewards/VideoAccuracy/std": 0.46505600214004517, + "step": 693, + "train_speed(iter/s)": 0.017456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 672.0, + "completions/mean_length": 491.71429443359375, + "completions/min_length": 352.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.353515625, + "entropy/min": 0.15234375, + "epoch": 0.694, + "grad_norm": 1.3098769771139183, + "kl": 0.267578125, + "learning_rate": 4.35556781133231e-07, + "loss": 0.0026831082068383694, + "memory(GiB)": 147.2, + "reward": 2.2679808139801025, + "reward_std": 0.11200588941574097, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7510750889778137, + "rewards/EvidenceHallucination/std": 0.3746412992477417, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.7963330745697021, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.9558608531951904, + "rewards/VideoAccuracy/std": 0.2994694709777832, + "step": 694, + "train_speed(iter/s)": 0.017458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 775.0, + "completions/mean_length": 565.2619018554688, + "completions/min_length": 384.0, + "entropy/max": 0.72265625, + "entropy/mean": 0.39453125, + "entropy/min": 0.15625, + "epoch": 0.695, + "grad_norm": 1.071990321028825, + "kl": 0.2294921875, + "learning_rate": 4.3294013613722937e-07, + "loss": 0.002319510094821453, + "memory(GiB)": 147.2, + "reward": 1.5056880712509155, + "reward_std": 0.33209341764450073, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28727561235427856, + "rewards/EvidenceHallucination/std": 0.42239242792129517, + "rewards/Evidence_Num_Record/mean": 5.476190567016602, + "rewards/Evidence_Num_Record/std": 2.015042304992676, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.38156628608703613, + "rewards/VideoAccuracy/std": 0.4642001688480377, + "step": 695, + "train_speed(iter/s)": 0.017465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2371.0, + "completions/mean_length": 559.7857055664062, + "completions/min_length": 301.0, + "entropy/max": 1.0078125, + "entropy/mean": 0.4453125, + "entropy/min": 0.1533203125, + "epoch": 0.696, + "grad_norm": 1.2218640902082203, + "kl": 0.2421875, + "learning_rate": 4.303292014277612e-07, + "loss": 0.0025270027108490467, + "memory(GiB)": 147.2, + "reward": 1.5773190259933472, + "reward_std": 0.14106562733650208, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3630666732788086, + "rewards/EvidenceHallucination/std": 0.44932857155799866, + "rewards/Evidence_Num_Record/mean": 5.61904764175415, + "rewards/Evidence_Num_Record/std": 5.103572845458984, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.45232465863227844, + "rewards/VideoAccuracy/std": 0.4420204162597656, + "step": 696, + "train_speed(iter/s)": 0.017443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 655.0, + "completions/mean_length": 438.4285888671875, + "completions/min_length": 292.0, + "entropy/max": 0.54296875, + "entropy/mean": 0.40625, + "entropy/min": 0.29296875, + "epoch": 0.697, + "grad_norm": 1.1802489020241533, + "kl": 0.298828125, + "learning_rate": 4.277240032969105e-07, + "loss": 0.0030094156973063946, + "memory(GiB)": 147.2, + "reward": 1.8751962184906006, + "reward_std": 0.1341707706451416, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5513318777084351, + "rewards/EvidenceHallucination/std": 0.4631679058074951, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 0.8942323923110962, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6649297475814819, + "rewards/VideoAccuracy/std": 0.5983375906944275, + "step": 697, + "train_speed(iter/s)": 0.017447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 770.0, + "completions/mean_length": 552.7857055664062, + "completions/min_length": 356.0, + "entropy/max": 1.109375, + "entropy/mean": 0.36328125, + "entropy/min": 0.15234375, + "epoch": 0.698, + "grad_norm": 0.8709243260210473, + "kl": 0.2255859375, + "learning_rate": 4.251245679789928e-07, + "loss": 0.0022979374043643475, + "memory(GiB)": 147.2, + "reward": 2.063358783721924, + "reward_std": 0.1354062259197235, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5890229940414429, + "rewards/EvidenceHallucination/std": 0.4311382472515106, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.783913254737854, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.8217445611953735, + "rewards/VideoAccuracy/std": 0.4496909976005554, + "step": 698, + "train_speed(iter/s)": 0.017446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 805.0, + "completions/mean_length": 518.8809814453125, + "completions/min_length": 334.0, + "entropy/max": 1.7265625, + "entropy/mean": 0.484375, + "entropy/min": 0.228515625, + "epoch": 0.699, + "grad_norm": 1.1510972726493807, + "kl": 0.26171875, + "learning_rate": 4.2253092165029326e-07, + "loss": 0.0026733647100627422, + "memory(GiB)": 147.2, + "reward": 1.6124541759490967, + "reward_std": 0.28479164838790894, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41173335909843445, + "rewards/EvidenceHallucination/std": 0.44702908396720886, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 1.4650397300720215, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.4777264893054962, + "rewards/VideoAccuracy/std": 0.4453105330467224, + "step": 699, + "train_speed(iter/s)": 0.017444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2616.0, + "completions/mean_length": 513.9761962890625, + "completions/min_length": 291.0, + "entropy/max": 0.75, + "entropy/mean": 0.4296875, + "entropy/min": 0.1279296875, + "epoch": 0.7, + "grad_norm": 1.2691967916036708, + "kl": 0.251953125, + "learning_rate": 4.1994309042880193e-07, + "loss": 0.002594948513433337, + "memory(GiB)": 147.2, + "reward": 1.737424612045288, + "reward_std": 0.26810336112976074, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45363911986351013, + "rewards/EvidenceHallucination/std": 0.4442523717880249, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 3.351287364959717, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6133636236190796, + "rewards/VideoAccuracy/std": 0.5350895524024963, + "step": 700, + "train_speed(iter/s)": 0.017432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1964.0, + "completions/mean_length": 573.952392578125, + "completions/min_length": 329.0, + "entropy/max": 1.015625, + "entropy/mean": 0.447265625, + "entropy/min": 0.1416015625, + "epoch": 0.701, + "grad_norm": 1.0853250853365422, + "kl": 0.2578125, + "learning_rate": 4.173611003739498e-07, + "loss": 0.002697226358577609, + "memory(GiB)": 136.59, + "reward": 1.6427127122879028, + "reward_std": 0.10287611186504364, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4754685163497925, + "rewards/EvidenceHallucination/std": 0.4412776529788971, + "rewards/Evidence_Num_Record/mean": 6.309524059295654, + "rewards/Evidence_Num_Record/std": 4.425469875335693, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5476190447807312, + "rewards/VideoAccuracy/std": 0.5037605166435242, + "step": 701, + "train_speed(iter/s)": 3.892747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1631.0, + "completions/mean_length": 590.0714111328125, + "completions/min_length": 301.0, + "entropy/max": 1.6875, + "entropy/mean": 0.5, + "entropy/min": 0.1875, + "epoch": 0.702, + "grad_norm": 1.0887211150611817, + "kl": 0.2470703125, + "learning_rate": 4.1478497748634876e-07, + "loss": 0.0025451509281992912, + "memory(GiB)": 136.59, + "reward": 1.436562418937683, + "reward_std": 0.3272863030433655, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31658434867858887, + "rewards/EvidenceHallucination/std": 0.41431131958961487, + "rewards/Evidence_Num_Record/mean": 6.1666669845581055, + "rewards/Evidence_Num_Record/std": 1.898865818977356, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.33991220593452454, + "rewards/VideoAccuracy/std": 0.44950252771377563, + "step": 702, + "train_speed(iter/s)": 2.872482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1660.0, + "completions/mean_length": 565.952392578125, + "completions/min_length": 332.0, + "entropy/max": 0.7109375, + "entropy/mean": 0.470703125, + "entropy/min": 0.279296875, + "epoch": 0.703, + "grad_norm": 1.1977387701297668, + "kl": 0.2470703125, + "learning_rate": 4.1221474770752696e-07, + "loss": 0.0025409169029444456, + "memory(GiB)": 136.59, + "reward": 1.3434559106826782, + "reward_std": 0.3675708770751953, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22899995744228363, + "rewards/EvidenceHallucination/std": 0.3948313295841217, + "rewards/Evidence_Num_Record/mean": 5.952381134033203, + "rewards/Evidence_Num_Record/std": 4.444521903991699, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.26432254910469055, + "rewards/VideoAccuracy/std": 0.36528319120407104, + "step": 703, + "train_speed(iter/s)": 2.33247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 890.0, + "completions/mean_length": 521.6190795898438, + "completions/min_length": 371.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.34765625, + "entropy/min": 0.134765625, + "epoch": 0.704, + "grad_norm": 1.1878908628763787, + "kl": 0.25390625, + "learning_rate": 4.096504369196704e-07, + "loss": 0.0025660318788141012, + "memory(GiB)": 136.59, + "reward": 2.0264220237731934, + "reward_std": 0.2580689787864685, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4410540759563446, + "rewards/EvidenceHallucination/std": 0.4718802869319916, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.1024731397628784, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.761904776096344, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.7858304381370544, + "rewards/VideoAccuracy/std": 0.5604264736175537, + "step": 704, + "train_speed(iter/s)": 2.056455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 828.0, + "completions/mean_length": 546.7619018554688, + "completions/min_length": 322.0, + "entropy/max": 1.1171875, + "entropy/mean": 0.40234375, + "entropy/min": 0.1689453125, + "epoch": 0.705, + "grad_norm": 0.8909335778267706, + "kl": 0.2421875, + "learning_rate": 4.070920709453597e-07, + "loss": 0.0024710441939532757, + "memory(GiB)": 137.01, + "reward": 1.8602908849716187, + "reward_std": 0.026720553636550903, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5794177055358887, + "rewards/EvidenceHallucination/std": 0.4214595556259155, + "rewards/Evidence_Num_Record/mean": 5.261904716491699, + "rewards/Evidence_Num_Record/std": 2.0607781410217285, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6777406334877014, + "rewards/VideoAccuracy/std": 0.4865841567516327, + "step": 705, + "train_speed(iter/s)": 1.808727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 957.0, + "completions/mean_length": 537.6904907226562, + "completions/min_length": 314.0, + "entropy/max": 0.7734375, + "entropy/mean": 0.416015625, + "entropy/min": 0.2578125, + "epoch": 0.706, + "grad_norm": 1.1417716671268525, + "kl": 0.24609375, + "learning_rate": 4.0453967554731207e-07, + "loss": 0.0025275161024183035, + "memory(GiB)": 137.02, + "reward": 1.4983958005905151, + "reward_std": 0.1210184395313263, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.31716296076774597, + "rewards/EvidenceHallucination/std": 0.437943696975708, + "rewards/Evidence_Num_Record/mean": 5.547619342803955, + "rewards/Evidence_Num_Record/std": 2.3808419704437256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.36829641461372375, + "rewards/VideoAccuracy/std": 0.40108880400657654, + "step": 706, + "train_speed(iter/s)": 1.640966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 600.0, + "completions/mean_length": 435.9761962890625, + "completions/min_length": 309.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.4453125, + "entropy/min": 0.251953125, + "epoch": 0.707, + "grad_norm": 1.1649223404096742, + "kl": 0.28515625, + "learning_rate": 4.019932764281211e-07, + "loss": 0.0028660595417022705, + "memory(GiB)": 137.02, + "reward": 1.719510555267334, + "reward_std": 0.3151416778564453, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42755359411239624, + "rewards/EvidenceHallucination/std": 0.4588277339935303, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.0339757204055786, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.5435237288475037, + "rewards/VideoAccuracy/std": 0.5430207848548889, + "step": 707, + "train_speed(iter/s)": 1.511853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2298.0, + "completions/mean_length": 568.2619018554688, + "completions/min_length": 344.0, + "entropy/max": 0.5625, + "entropy/mean": 0.25390625, + "entropy/min": 0.11474609375, + "epoch": 0.708, + "grad_norm": 0.9712050959951763, + "kl": 0.2119140625, + "learning_rate": 3.9945289922999705e-07, + "loss": 0.0022306388709694147, + "memory(GiB)": 137.02, + "reward": 2.0796968936920166, + "reward_std": 0.157607302069664, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.17472554743289948, + "rewards/EvidenceHallucination/std": 0.29664650559425354, + "rewards/Evidence_Num_Record/mean": 4.928571701049805, + "rewards/Evidence_Num_Record/std": 5.274849891662598, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.9161801934242249, + "rewards/VideoAccuracy/std": 0.3562900424003601, + "step": 708, + "train_speed(iter/s)": 1.233763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1125.0, + "completions/mean_length": 519.2380981445312, + "completions/min_length": 332.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.431640625, + "entropy/min": 0.255859375, + "epoch": 0.709, + "grad_norm": 1.118068943990787, + "kl": 0.267578125, + "learning_rate": 3.9691856953451043e-07, + "loss": 0.0027058953419327736, + "memory(GiB)": 137.02, + "reward": 1.5037561655044556, + "reward_std": 0.2950292229652405, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28459712862968445, + "rewards/EvidenceHallucination/std": 0.4152517020702362, + "rewards/Evidence_Num_Record/mean": 5.261904716491699, + "rewards/Evidence_Num_Record/std": 2.8632194995880127, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.3849318325519562, + "rewards/VideoAccuracy/std": 0.4115446209907532, + "step": 709, + "train_speed(iter/s)": 1.151477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1453.0, + "completions/mean_length": 521.9285888671875, + "completions/min_length": 344.0, + "entropy/max": 0.75390625, + "entropy/mean": 0.44921875, + "entropy/min": 0.26171875, + "epoch": 0.71, + "grad_norm": 1.1740647837126024, + "kl": 0.251953125, + "learning_rate": 3.943903128623335e-07, + "loss": 0.002540436340495944, + "memory(GiB)": 137.02, + "reward": 1.4951140880584717, + "reward_std": 0.16538338363170624, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2859345078468323, + "rewards/EvidenceHallucination/std": 0.3980078399181366, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 0.9678334593772888, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.404593825340271, + "rewards/VideoAccuracy/std": 0.5376194715499878, + "step": 710, + "train_speed(iter/s)": 1.062856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 924.0, + "completions/mean_length": 514.7380981445312, + "completions/min_length": 364.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.32421875, + "entropy/min": 0.1005859375, + "epoch": 0.711, + "grad_norm": 1.076386212525606, + "kl": 0.2294921875, + "learning_rate": 3.918681546729822e-07, + "loss": 0.0023401621729135513, + "memory(GiB)": 137.02, + "reward": 1.8365966081619263, + "reward_std": 0.07770262658596039, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32080966234207153, + "rewards/EvidenceHallucination/std": 0.4606294333934784, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.6176836490631104, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5724345445632935, + "rewards/VideoAccuracy/std": 0.4593268632888794, + "step": 711, + "train_speed(iter/s)": 1.009283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/mean_length": 572.8095092773438, + "completions/min_length": 286.0, + "entropy/max": 1.25, + "entropy/mean": 0.49609375, + "entropy/min": 0.28125, + "epoch": 0.712, + "grad_norm": 1.4269730629808097, + "kl": 0.259765625, + "learning_rate": 3.8935212036456175e-07, + "loss": 0.0026799244806170464, + "memory(GiB)": 137.02, + "reward": 1.976933479309082, + "reward_std": 0.23799024522304535, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6350836753845215, + "rewards/EvidenceHallucination/std": 0.3273024559020996, + "rewards/Evidence_Num_Record/mean": 6.452381134033203, + "rewards/Evidence_Num_Record/std": 4.712217330932617, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8165834546089172, + "rewards/VideoAccuracy/std": 0.3730264902114868, + "step": 712, + "train_speed(iter/s)": 0.941002 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 740.0, + "completions/mean_length": 488.8095397949219, + "completions/min_length": 345.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.455078125, + "entropy/min": 0.25, + "epoch": 0.713, + "grad_norm": 1.3572604623602544, + "kl": 0.271484375, + "learning_rate": 3.868422352735102e-07, + "loss": 0.0027485534083098173, + "memory(GiB)": 137.02, + "reward": 1.7807960510253906, + "reward_std": 0.3023853003978729, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5612412691116333, + "rewards/EvidenceHallucination/std": 0.43778860569000244, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 1.4362164735794067, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.639976441860199, + "rewards/VideoAccuracy/std": 0.4260217845439911, + "step": 713, + "train_speed(iter/s)": 0.877992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 961.0, + "completions/mean_length": 534.7857055664062, + "completions/min_length": 360.0, + "entropy/max": 0.53125, + "entropy/mean": 0.326171875, + "entropy/min": 0.123046875, + "epoch": 0.714, + "grad_norm": 1.1692359671507124, + "kl": 0.2490234375, + "learning_rate": 3.843385246743417e-07, + "loss": 0.0025063527282327414, + "memory(GiB)": 137.02, + "reward": 2.2322938442230225, + "reward_std": 0.14283570647239685, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.658531904220581, + "rewards/EvidenceHallucination/std": 0.4137778580188751, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.7392277121543884, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9339207410812378, + "rewards/VideoAccuracy/std": 0.4797661602497101, + "step": 714, + "train_speed(iter/s)": 0.832326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1641.0, + "completions/mean_length": 593.452392578125, + "completions/min_length": 354.0, + "entropy/max": 3.0625, + "entropy/mean": 0.48828125, + "entropy/min": 0.14453125, + "epoch": 0.715, + "grad_norm": 1.181223745862962, + "kl": 0.2255859375, + "learning_rate": 3.818410137793947e-07, + "loss": 0.002410789020359516, + "memory(GiB)": 137.02, + "reward": 1.8372621536254883, + "reward_std": 0.2797810137271881, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.498088538646698, + "rewards/EvidenceHallucination/std": 0.36788809299468994, + "rewards/Evidence_Num_Record/mean": 5.904761791229248, + "rewards/Evidence_Num_Record/std": 4.360630512237549, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6709778308868408, + "rewards/VideoAccuracy/std": 0.4687010943889618, + "step": 715, + "train_speed(iter/s)": 0.781068 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1877.0, + "completions/mean_length": 493.7857360839844, + "completions/min_length": 257.0, + "entropy/max": 0.83203125, + "entropy/mean": 0.41796875, + "entropy/min": 0.1513671875, + "epoch": 0.716, + "grad_norm": 0.9383627435068316, + "kl": 0.265625, + "learning_rate": 3.7934972773857634e-07, + "loss": 0.0028080097399652004, + "memory(GiB)": 137.02, + "reward": 1.4566277265548706, + "reward_std": 0.08022846281528473, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27737775444984436, + "rewards/EvidenceHallucination/std": 0.4242125153541565, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.9284530878067017, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.3392474055290222, + "rewards/VideoAccuracy/std": 0.3877919316291809, + "step": 716, + "train_speed(iter/s)": 0.730442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 616.0, + "completions/mean_length": 463.1428527832031, + "completions/min_length": 319.0, + "entropy/max": 0.84375, + "entropy/mean": 0.427734375, + "entropy/min": 0.287109375, + "epoch": 0.717, + "grad_norm": 1.183539422038026, + "kl": 0.265625, + "learning_rate": 3.7686469163910883e-07, + "loss": 0.0026837345212697983, + "memory(GiB)": 137.02, + "reward": 2.0717267990112305, + "reward_std": 0.3083031177520752, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5801745653152466, + "rewards/EvidenceHallucination/std": 0.44655001163482666, + "rewards/Evidence_Num_Record/mean": 4.547619342803955, + "rewards/Evidence_Num_Record/std": 1.1305595636367798, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.85569167137146, + "rewards/VideoAccuracy/std": 0.6598480343818665, + "step": 717, + "train_speed(iter/s)": 0.704933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1163.0, + "completions/mean_length": 560.0238037109375, + "completions/min_length": 268.0, + "entropy/max": 1.9453125, + "entropy/mean": 0.37890625, + "entropy/min": 0.10400390625, + "epoch": 0.718, + "grad_norm": 1.174024215201866, + "kl": 0.2255859375, + "learning_rate": 3.7438593050527846e-07, + "loss": 0.0023144427686929703, + "memory(GiB)": 137.02, + "reward": 1.712986946105957, + "reward_std": 0.1705286204814911, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4080731272697449, + "rewards/EvidenceHallucination/std": 0.45950257778167725, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 1.361491322517395, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5476190447807312, + "rewards/HonestTime/std": 0.5037605166435242, + "rewards/VideoAccuracy/mean": 0.5218484997749329, + "rewards/VideoAccuracy/std": 0.3351951837539673, + "step": 718, + "train_speed(iter/s)": 0.663054 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2105.0, + "completions/mean_length": 543.1428833007812, + "completions/min_length": 265.0, + "entropy/max": 1.0234375, + "entropy/mean": 0.466796875, + "entropy/min": 0.1669921875, + "epoch": 0.719, + "grad_norm": 1.0306257671320522, + "kl": 0.2431640625, + "learning_rate": 3.719134692981826e-07, + "loss": 0.0025380700826644897, + "memory(GiB)": 137.02, + "reward": 1.3437881469726562, + "reward_std": 0.03641896694898605, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20182187855243683, + "rewards/EvidenceHallucination/std": 0.392501562833786, + "rewards/Evidence_Num_Record/mean": 5.333333492279053, + "rewards/Evidence_Num_Record/std": 3.7329559326171875, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.24151895940303802, + "rewards/VideoAccuracy/std": 0.37862592935562134, + "step": 719, + "train_speed(iter/s)": 0.627096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 641.0, + "completions/mean_length": 448.90478515625, + "completions/min_length": 358.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.447265625, + "entropy/min": 0.33203125, + "epoch": 0.72, + "grad_norm": 1.224799165841684, + "kl": 0.267578125, + "learning_rate": 3.694473329154778e-07, + "loss": 0.002683891449123621, + "memory(GiB)": 137.02, + "reward": 1.6364336013793945, + "reward_std": 0.3253479599952698, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35876768827438354, + "rewards/EvidenceHallucination/std": 0.4279513955116272, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.9093654751777649, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5361085534095764, + "rewards/VideoAccuracy/std": 0.6426951289176941, + "step": 720, + "train_speed(iter/s)": 0.605893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 950.0, + "completions/mean_length": 570.5714111328125, + "completions/min_length": 408.0, + "entropy/max": 0.486328125, + "entropy/mean": 0.267578125, + "entropy/min": 0.1181640625, + "epoch": 0.721, + "grad_norm": 0.9768702301605966, + "kl": 0.2080078125, + "learning_rate": 3.6698754619112973e-07, + "loss": 0.0021096975542604923, + "memory(GiB)": 137.02, + "reward": 2.3190793991088867, + "reward_std": 0.10299627482891083, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3182400166988373, + "rewards/EvidenceHallucination/std": 0.4300622045993805, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.0473682880401611, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.0554314851760864, + "rewards/VideoAccuracy/std": 0.16592219471931458, + "step": 721, + "train_speed(iter/s)": 0.56258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/mean_length": 621.4761962890625, + "completions/min_length": 382.0, + "entropy/max": 0.765625, + "entropy/mean": 0.4453125, + "entropy/min": 0.228515625, + "epoch": 0.722, + "grad_norm": 1.445071191813976, + "kl": 0.23046875, + "learning_rate": 3.6453413389516385e-07, + "loss": 0.0023926938883960247, + "memory(GiB)": 137.02, + "reward": 2.033181667327881, + "reward_std": 0.12473130226135254, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7227866649627686, + "rewards/EvidenceHallucination/std": 0.2981109917163849, + "rewards/Evidence_Num_Record/mean": 7.0714287757873535, + "rewards/Evidence_Num_Record/std": 3.9471688270568848, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8552910089492798, + "rewards/VideoAccuracy/std": 0.32602179050445557, + "step": 722, + "train_speed(iter/s)": 0.544616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/mean_length": 492.19049072265625, + "completions/min_length": 317.0, + "entropy/max": 0.59375, + "entropy/mean": 0.447265625, + "entropy/min": 0.275390625, + "epoch": 0.723, + "grad_norm": 1.2170647391313107, + "kl": 0.265625, + "learning_rate": 3.62087120733415e-07, + "loss": 0.0026650577783584595, + "memory(GiB)": 137.02, + "reward": 1.5765604972839355, + "reward_std": 0.25286492705345154, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39277371764183044, + "rewards/EvidenceHallucination/std": 0.4677566885948181, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.3657498359680176, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.4075295329093933, + "rewards/VideoAccuracy/std": 0.3982459306716919, + "step": 723, + "train_speed(iter/s)": 0.529228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/mean_length": 539.1190795898438, + "completions/min_length": 355.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.349609375, + "entropy/min": 0.095703125, + "epoch": 0.724, + "grad_norm": 1.1063410984775115, + "kl": 0.24609375, + "learning_rate": 3.596465313472777e-07, + "loss": 0.002490151673555374, + "memory(GiB)": 137.02, + "reward": 1.788365364074707, + "reward_std": 0.16274519264698029, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4031307101249695, + "rewards/EvidenceHallucination/std": 0.4470342695713043, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 1.0779707431793213, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.738095223903656, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.5601202249526978, + "rewards/VideoAccuracy/std": 0.5124112367630005, + "step": 724, + "train_speed(iter/s)": 0.508674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2080.0, + "completions/mean_length": 616.5952758789062, + "completions/min_length": 336.0, + "entropy/max": 1.109375, + "entropy/mean": 0.353515625, + "entropy/min": 0.166015625, + "epoch": 0.725, + "grad_norm": 1.208381710184627, + "kl": 0.22265625, + "learning_rate": 3.5721239031346063e-07, + "loss": 0.0023550393525511026, + "memory(GiB)": 137.02, + "reward": 2.0842397212982178, + "reward_std": 0.2013828456401825, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6131070256233215, + "rewards/EvidenceHallucination/std": 0.3226134181022644, + "rewards/Evidence_Num_Record/mean": 6.404761791229248, + "rewards/Evidence_Num_Record/std": 6.0164794921875, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8949514031410217, + "rewards/VideoAccuracy/std": 0.29472899436950684, + "step": 725, + "train_speed(iter/s)": 0.480815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 719.0, + "completions/mean_length": 507.1190490722656, + "completions/min_length": 332.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.4375, + "entropy/min": 0.2451171875, + "epoch": 0.726, + "grad_norm": 1.3085967014885909, + "kl": 0.259765625, + "learning_rate": 3.5478472214373713e-07, + "loss": 0.0026305762585252523, + "memory(GiB)": 137.02, + "reward": 1.688267469406128, + "reward_std": 0.15720278024673462, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.43249648809432983, + "rewards/EvidenceHallucination/std": 0.44495826959609985, + "rewards/Evidence_Num_Record/mean": 4.690476417541504, + "rewards/Evidence_Num_Record/std": 1.45649254322052, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.5160539150238037, + "rewards/VideoAccuracy/std": 0.3884340226650238, + "step": 726, + "train_speed(iter/s)": 0.468899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/mean_length": 435.26190185546875, + "completions/min_length": 201.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.466796875, + "entropy/min": 0.26953125, + "epoch": 0.727, + "grad_norm": 1.4513180977801843, + "kl": 0.291015625, + "learning_rate": 3.523635512846981e-07, + "loss": 0.002929308917373419, + "memory(GiB)": 137.02, + "reward": 1.5991050004959106, + "reward_std": 0.29706868529319763, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2844417691230774, + "rewards/EvidenceHallucination/std": 0.4057222902774811, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 1.3216679096221924, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.44221657514572144, + "rewards/VideoAccuracy/std": 0.4839160740375519, + "step": 727, + "train_speed(iter/s)": 0.457833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1287.0, + "completions/mean_length": 604.9285888671875, + "completions/min_length": 365.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.296875, + "entropy/min": 0.12158203125, + "epoch": 0.728, + "grad_norm": 0.8739823875491329, + "kl": 0.20703125, + "learning_rate": 3.4994890211750747e-07, + "loss": 0.002132317516952753, + "memory(GiB)": 137.02, + "reward": 1.9869023561477661, + "reward_std": 0.14404073357582092, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5619248151779175, + "rewards/EvidenceHallucination/std": 0.43674349784851074, + "rewards/Evidence_Num_Record/mean": 5.095238208770752, + "rewards/Evidence_Num_Record/std": 3.962073564529419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.7459458708763123, + "rewards/VideoAccuracy/std": 0.4712871015071869, + "step": 728, + "train_speed(iter/s)": 0.440075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1148.0, + "completions/mean_length": 535.5952758789062, + "completions/min_length": 386.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.4609375, + "entropy/min": 0.2578125, + "epoch": 0.729, + "grad_norm": 1.282961691054857, + "kl": 0.251953125, + "learning_rate": 3.4754079895765596e-07, + "loss": 0.002549453405663371, + "memory(GiB)": 137.02, + "reward": 1.6989330053329468, + "reward_std": 0.23000681400299072, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45383572578430176, + "rewards/EvidenceHallucination/std": 0.44538405537605286, + "rewards/Evidence_Num_Record/mean": 5.547619342803955, + "rewards/Evidence_Num_Record/std": 2.318561315536499, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.5510229468345642, + "rewards/VideoAccuracy/std": 0.4384976029396057, + "step": 729, + "train_speed(iter/s)": 0.43011 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 636.0, + "completions/mean_length": 472.0476379394531, + "completions/min_length": 333.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.46875, + "entropy/min": 0.33984375, + "epoch": 0.73, + "grad_norm": 1.5204018132602868, + "kl": 0.279296875, + "learning_rate": 3.45139266054715e-07, + "loss": 0.0028142035007476807, + "memory(GiB)": 137.02, + "reward": 1.6489835977554321, + "reward_std": 0.31000250577926636, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3234373927116394, + "rewards/EvidenceHallucination/std": 0.4225875735282898, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 0.813646674156189, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5176292061805725, + "rewards/VideoAccuracy/std": 0.5845872163772583, + "step": 730, + "train_speed(iter/s)": 0.420384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 932.0, + "completions/mean_length": 509.5952453613281, + "completions/min_length": 318.0, + "entropy/max": 0.91796875, + "entropy/mean": 0.330078125, + "entropy/min": 0.1328125, + "epoch": 0.731, + "grad_norm": 1.0430166734748847, + "kl": 0.24609375, + "learning_rate": 3.427443275920945e-07, + "loss": 0.0025074242148548365, + "memory(GiB)": 137.02, + "reward": 2.30470871925354, + "reward_std": 0.17227831482887268, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6398620009422302, + "rewards/EvidenceHallucination/std": 0.4209192991256714, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9814980030059814, + "rewards/VideoAccuracy/std": 0.3276780843734741, + "step": 731, + "train_speed(iter/s)": 0.407664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1714.0, + "completions/mean_length": 607.1428833007812, + "completions/min_length": 380.0, + "entropy/max": 1.515625, + "entropy/mean": 0.435546875, + "entropy/min": 0.16796875, + "epoch": 0.732, + "grad_norm": 1.107850934047529, + "kl": 0.23046875, + "learning_rate": 3.403560076867985e-07, + "loss": 0.002410092856734991, + "memory(GiB)": 137.02, + "reward": 1.703174114227295, + "reward_std": 0.10910899937152863, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48142144083976746, + "rewards/EvidenceHallucination/std": 0.45236778259277344, + "rewards/Evidence_Num_Record/mean": 6.761904716491699, + "rewards/Evidence_Num_Record/std": 3.8686327934265137, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5783182978630066, + "rewards/VideoAccuracy/std": 0.46562719345092773, + "step": 732, + "train_speed(iter/s)": 0.394389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 754.0, + "completions/mean_length": 475.19049072265625, + "completions/min_length": 308.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.427734375, + "entropy/min": 0.2734375, + "epoch": 0.733, + "grad_norm": 1.1707575932936314, + "kl": 0.271484375, + "learning_rate": 3.3797433038918145e-07, + "loss": 0.0027360159438103437, + "memory(GiB)": 137.02, + "reward": 1.5253936052322388, + "reward_std": 0.3052489757537842, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.30475470423698425, + "rewards/EvidenceHallucination/std": 0.4269247353076935, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 1.3077540397644043, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.4120616614818573, + "rewards/VideoAccuracy/std": 0.408565491437912, + "step": 733, + "train_speed(iter/s)": 0.378443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 596.0, + "completions/mean_length": 456.71429443359375, + "completions/min_length": 324.0, + "entropy/max": 0.53125, + "entropy/mean": 0.353515625, + "entropy/min": 0.1826171875, + "epoch": 0.734, + "grad_norm": 1.0924188852331589, + "kl": 0.27734375, + "learning_rate": 3.355993196827075e-07, + "loss": 0.002987553831189871, + "memory(GiB)": 137.02, + "reward": 2.08844256401062, + "reward_std": 0.16964636743068695, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6116736531257629, + "rewards/EvidenceHallucination/std": 0.4442037343978882, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 0.9472129940986633, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.8042028546333313, + "rewards/VideoAccuracy/std": 0.51776522397995, + "step": 734, + "train_speed(iter/s)": 0.369455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/mean_length": 596.3333740234375, + "completions/min_length": 334.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.40625, + "entropy/min": 0.16796875, + "epoch": 0.735, + "grad_norm": 1.0015811368115848, + "kl": 0.2353515625, + "learning_rate": 3.332309994837085e-07, + "loss": 0.002433416899293661, + "memory(GiB)": 137.02, + "reward": 1.982049584388733, + "reward_std": 0.2236880511045456, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.604497492313385, + "rewards/EvidenceHallucination/std": 0.39630842208862305, + "rewards/Evidence_Num_Record/mean": 5.738095283508301, + "rewards/Evidence_Num_Record/std": 3.246457815170288, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7944831848144531, + "rewards/VideoAccuracy/std": 0.5202955007553101, + "step": 735, + "train_speed(iter/s)": 0.357714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 876.0, + "completions/mean_length": 517.1904907226562, + "completions/min_length": 318.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.40625, + "entropy/min": 0.2216796875, + "epoch": 0.736, + "grad_norm": 1.324356052193191, + "kl": 0.255859375, + "learning_rate": 3.308693936411421e-07, + "loss": 0.0025905664078891277, + "memory(GiB)": 137.02, + "reward": 1.7714544534683228, + "reward_std": 0.3001267910003662, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5094105005264282, + "rewards/EvidenceHallucination/std": 0.4729577600955963, + "rewards/Evidence_Num_Record/mean": 4.785714149475098, + "rewards/Evidence_Num_Record/std": 1.6897931098937988, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.5743343830108643, + "rewards/VideoAccuracy/std": 0.35331177711486816, + "step": 736, + "train_speed(iter/s)": 0.351023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 730.0, + "completions/mean_length": 462.71429443359375, + "completions/min_length": 294.0, + "entropy/max": 0.55859375, + "entropy/mean": 0.40234375, + "entropy/min": 0.25, + "epoch": 0.737, + "grad_norm": 1.2447147031615817, + "kl": 0.287109375, + "learning_rate": 3.2851452593635265e-07, + "loss": 0.0029010369908064604, + "memory(GiB)": 137.02, + "reward": 1.7444754838943481, + "reward_std": 0.20037083327770233, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3784564733505249, + "rewards/EvidenceHallucination/std": 0.4463752210140228, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.0109734535217285, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.5687841176986694, + "rewards/VideoAccuracy/std": 0.6110247373580933, + "step": 737, + "train_speed(iter/s)": 0.344746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 895.0, + "completions/mean_length": 579.8809814453125, + "completions/min_length": 426.0, + "entropy/max": 1.5546875, + "entropy/mean": 0.41015625, + "entropy/min": 0.1630859375, + "epoch": 0.738, + "grad_norm": 0.9076314424297449, + "kl": 0.2080078125, + "learning_rate": 3.2616642008283214e-07, + "loss": 0.0021310984157025814, + "memory(GiB)": 137.02, + "reward": 1.9944345951080322, + "reward_std": 0.19511547684669495, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24974822998046875, + "rewards/EvidenceHallucination/std": 0.35780394077301025, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.9660297632217407, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8111514449119568, + "rewards/VideoAccuracy/std": 0.44406232237815857, + "step": 738, + "train_speed(iter/s)": 0.332188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/mean_length": 547.2619018554688, + "completions/min_length": 340.0, + "entropy/max": 0.59765625, + "entropy/mean": 0.458984375, + "entropy/min": 0.2451171875, + "epoch": 0.739, + "grad_norm": 1.0060410760992153, + "kl": 0.2392578125, + "learning_rate": 3.2382509972598084e-07, + "loss": 0.0024247546680271626, + "memory(GiB)": 137.02, + "reward": 1.5274099111557007, + "reward_std": 0.19877104461193085, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37413010001182556, + "rewards/EvidenceHallucination/std": 0.4627932608127594, + "rewards/Evidence_Num_Record/mean": 5.833333492279053, + "rewards/Evidence_Num_Record/std": 1.7096093893051147, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.39067915081977844, + "rewards/VideoAccuracy/std": 0.41949576139450073, + "step": 739, + "train_speed(iter/s)": 0.327163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 800.0, + "completions/mean_length": 481.26190185546875, + "completions/min_length": 314.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.419921875, + "entropy/min": 0.2734375, + "epoch": 0.74, + "grad_norm": 1.2859510032541313, + "kl": 0.279296875, + "learning_rate": 3.214905884428679e-07, + "loss": 0.002799172420054674, + "memory(GiB)": 137.02, + "reward": 1.4543876647949219, + "reward_std": 0.15212209522724152, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20015443861484528, + "rewards/EvidenceHallucination/std": 0.3680866062641144, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.6562975645065308, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.35721397399902344, + "rewards/VideoAccuracy/std": 0.4436786472797394, + "step": 740, + "train_speed(iter/s)": 0.322398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/mean_length": 587.452392578125, + "completions/min_length": 350.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.302734375, + "entropy/min": 0.10107421875, + "epoch": 0.741, + "grad_norm": 0.8845740254085731, + "kl": 0.2119140625, + "learning_rate": 3.1916290974199655e-07, + "loss": 0.002165029523894191, + "memory(GiB)": 137.02, + "reward": 2.206284999847412, + "reward_std": 0.12530550360679626, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6892232298851013, + "rewards/EvidenceHallucination/std": 0.380018025636673, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.6251507997512817, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.8779641389846802, + "rewards/VideoAccuracy/std": 0.3144403398036957, + "step": 741, + "train_speed(iter/s)": 0.313079 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1150.0, + "completions/mean_length": 541.357177734375, + "completions/min_length": 277.0, + "entropy/max": 1.203125, + "entropy/mean": 0.53515625, + "entropy/min": 0.3125, + "epoch": 0.742, + "grad_norm": 1.1111498340287014, + "kl": 0.26171875, + "learning_rate": 3.168420870630657e-07, + "loss": 0.002663705265149474, + "memory(GiB)": 137.02, + "reward": 1.623490810394287, + "reward_std": 0.1593041568994522, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4474904239177704, + "rewards/EvidenceHallucination/std": 0.44139039516448975, + "rewards/Evidence_Num_Record/mean": 5.857142925262451, + "rewards/Evidence_Num_Record/std": 2.415109157562256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.095238097012043, + "rewards/HonestTime/std": 0.2971017360687256, + "rewards/VideoAccuracy/mean": 0.5149451494216919, + "rewards/VideoAccuracy/std": 0.4730999767780304, + "step": 742, + "train_speed(iter/s)": 0.307681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 676.0, + "completions/mean_length": 465.8809509277344, + "completions/min_length": 292.0, + "entropy/max": 0.625, + "entropy/mean": 0.4375, + "entropy/min": 0.30078125, + "epoch": 0.743, + "grad_norm": 1.224308382413667, + "kl": 0.2734375, + "learning_rate": 3.1452814377673343e-07, + "loss": 0.002747116144746542, + "memory(GiB)": 137.02, + "reward": 1.530137538909912, + "reward_std": 0.22144876420497894, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3962409794330597, + "rewards/EvidenceHallucination/std": 0.46595442295074463, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.2204699516296387, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.3985084295272827, + "rewards/VideoAccuracy/std": 0.41005414724349976, + "step": 743, + "train_speed(iter/s)": 0.302552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 967.0, + "completions/mean_length": 544.452392578125, + "completions/min_length": 392.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.326171875, + "entropy/min": 0.10693359375, + "epoch": 0.744, + "grad_norm": 1.166318698999342, + "kl": 0.2490234375, + "learning_rate": 3.12221103184383e-07, + "loss": 0.0025127048138529062, + "memory(GiB)": 137.02, + "reward": 1.797698974609375, + "reward_std": 0.151752769947052, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3840665817260742, + "rewards/EvidenceHallucination/std": 0.47486865520477295, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.0280616283416748, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.785714328289032, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 0.5637427568435669, + "rewards/VideoAccuracy/std": 0.4568615257740021, + "step": 744, + "train_speed(iter/s)": 0.296248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 766.0, + "completions/mean_length": 544.6428833007812, + "completions/min_length": 320.0, + "entropy/max": 1.3203125, + "entropy/mean": 0.455078125, + "entropy/min": 0.1494140625, + "epoch": 0.745, + "grad_norm": 0.9852337804421616, + "kl": 0.2412109375, + "learning_rate": 3.0992098851788817e-07, + "loss": 0.0024463534355163574, + "memory(GiB)": 137.02, + "reward": 1.7108333110809326, + "reward_std": 0.027525782585144043, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41925325989723206, + "rewards/EvidenceHallucination/std": 0.4327711760997772, + "rewards/Evidence_Num_Record/mean": 5.142857074737549, + "rewards/Evidence_Num_Record/std": 1.7884647846221924, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5650777220726013, + "rewards/VideoAccuracy/std": 0.505497932434082, + "step": 745, + "train_speed(iter/s)": 0.290104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 704.0, + "completions/mean_length": 492.26190185546875, + "completions/min_length": 247.0, + "entropy/max": 0.64453125, + "entropy/mean": 0.45703125, + "entropy/min": 0.287109375, + "epoch": 0.746, + "grad_norm": 1.3799214356283465, + "kl": 0.259765625, + "learning_rate": 3.0762782293937727e-07, + "loss": 0.002617140766233206, + "memory(GiB)": 137.02, + "reward": 1.6975003480911255, + "reward_std": 0.14715532958507538, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4007459878921509, + "rewards/EvidenceHallucination/std": 0.45319873094558716, + "rewards/Evidence_Num_Record/mean": 5.095238208770752, + "rewards/Evidence_Num_Record/std": 1.4618651866912842, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.5268748998641968, + "rewards/VideoAccuracy/std": 0.39935460686683655, + "step": 746, + "train_speed(iter/s)": 0.284507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 639.0, + "completions/mean_length": 478.21429443359375, + "completions/min_length": 327.0, + "entropy/max": 0.671875, + "entropy/mean": 0.41796875, + "entropy/min": 0.296875, + "epoch": 0.747, + "grad_norm": 1.0914875373782056, + "kl": 0.2890625, + "learning_rate": 3.0534162954100263e-07, + "loss": 0.0029141679406166077, + "memory(GiB)": 137.02, + "reward": 1.831376552581787, + "reward_std": 0.20554934442043304, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3792650103569031, + "rewards/EvidenceHallucination/std": 0.431448757648468, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.6602855324745178, + "rewards/VideoAccuracy/std": 0.6216288208961487, + "step": 747, + "train_speed(iter/s)": 0.280518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 751.0, + "completions/mean_length": 550.2142944335938, + "completions/min_length": 373.0, + "entropy/max": 1.1484375, + "entropy/mean": 0.33203125, + "entropy/min": 0.138671875, + "epoch": 0.748, + "grad_norm": 1.0007753875117082, + "kl": 0.2119140625, + "learning_rate": 3.0306243134470667e-07, + "loss": 0.0021712270099669695, + "memory(GiB)": 137.02, + "reward": 2.0038554668426514, + "reward_std": 0.2609616219997406, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4727940559387207, + "rewards/EvidenceHallucination/std": 0.4271722733974457, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.4989348649978638, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7759631872177124, + "rewards/VideoAccuracy/std": 0.471916139125824, + "step": 748, + "train_speed(iter/s)": 0.275697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2212.0, + "completions/mean_length": 574.6904907226562, + "completions/min_length": 384.0, + "entropy/max": 1.0625, + "entropy/mean": 0.43359375, + "entropy/min": 0.1591796875, + "epoch": 0.749, + "grad_norm": 1.342246423324975, + "kl": 0.2333984375, + "learning_rate": 3.007902513019893e-07, + "loss": 0.0023996024392545223, + "memory(GiB)": 137.02, + "reward": 1.821750521659851, + "reward_std": 0.22269243001937866, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5023324489593506, + "rewards/EvidenceHallucination/std": 0.43607866764068604, + "rewards/Evidence_Num_Record/mean": 6.309524059295654, + "rewards/Evidence_Num_Record/std": 4.181772708892822, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.6593791246414185, + "rewards/VideoAccuracy/std": 0.38703349232673645, + "step": 749, + "train_speed(iter/s)": 0.27039 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 759.0, + "completions/mean_length": 486.1190490722656, + "completions/min_length": 329.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.4296875, + "entropy/min": 0.259765625, + "epoch": 0.75, + "grad_norm": 1.206001882760331, + "kl": 0.267578125, + "learning_rate": 2.985251122936786e-07, + "loss": 0.0027073421515524387, + "memory(GiB)": 137.04, + "reward": 1.4515140056610107, + "reward_std": 0.20704594254493713, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27075642347335815, + "rewards/EvidenceHallucination/std": 0.4357222020626068, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.1160845756530762, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.30688655376434326, + "rewards/VideoAccuracy/std": 0.3046022355556488, + "step": 750, + "train_speed(iter/s)": 0.265822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1191.0, + "completions/mean_length": 571.4761962890625, + "completions/min_length": 343.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.287109375, + "entropy/min": 0.0927734375, + "epoch": 0.751, + "grad_norm": 0.9979638657062981, + "kl": 0.2314453125, + "learning_rate": 2.962670371296996e-07, + "loss": 0.0023694182746112347, + "memory(GiB)": 137.04, + "reward": 1.9292515516281128, + "reward_std": 0.17530755698680878, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.392671138048172, + "rewards/EvidenceHallucination/std": 0.4279833436012268, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.632993221282959, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6507173776626587, + "rewards/VideoAccuracy/std": 0.42830172181129456, + "step": 751, + "train_speed(iter/s)": 0.261773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 2198.0, + "completions/mean_length": 662.5952758789062, + "completions/min_length": 377.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.42578125, + "entropy/min": 0.1962890625, + "epoch": 0.752, + "grad_norm": 1.176046473783173, + "kl": 0.2275390625, + "learning_rate": 2.9401604854884357e-07, + "loss": 0.002370295813307166, + "memory(GiB)": 137.04, + "reward": 1.7728904485702515, + "reward_std": 0.13586002588272095, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49897584319114685, + "rewards/EvidenceHallucination/std": 0.41422632336616516, + "rewards/Evidence_Num_Record/mean": 7.238095283508301, + "rewards/Evidence_Num_Record/std": 6.495061874389648, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6397619247436523, + "rewards/VideoAccuracy/std": 0.4417704939842224, + "step": 752, + "train_speed(iter/s)": 0.257461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 689.0, + "completions/mean_length": 462.16668701171875, + "completions/min_length": 303.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.44140625, + "entropy/min": 0.279296875, + "epoch": 0.753, + "grad_norm": 1.4758477793811793, + "kl": 0.287109375, + "learning_rate": 2.9177216921854096e-07, + "loss": 0.0028979978524148464, + "memory(GiB)": 137.04, + "reward": 1.8449641466140747, + "reward_std": 0.3156450688838959, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5636225342750549, + "rewards/EvidenceHallucination/std": 0.43338704109191895, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.3477731943130493, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.6703349351882935, + "rewards/VideoAccuracy/std": 0.350552499294281, + "step": 753, + "train_speed(iter/s)": 0.254208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 915.0, + "completions/mean_length": 567.8333740234375, + "completions/min_length": 340.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.34765625, + "entropy/min": 0.16796875, + "epoch": 0.754, + "grad_norm": 1.0844028372997443, + "kl": 0.2470703125, + "learning_rate": 2.895354217346313e-07, + "loss": 0.002512303413823247, + "memory(GiB)": 137.04, + "reward": 2.207542896270752, + "reward_std": 0.20663592219352722, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7446616291999817, + "rewards/EvidenceHallucination/std": 0.3785170614719391, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 0.9947597980499268, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8919438123703003, + "rewards/VideoAccuracy/std": 0.4285814166069031, + "step": 754, + "train_speed(iter/s)": 0.250293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1079.0, + "completions/mean_length": 558.40478515625, + "completions/min_length": 346.0, + "entropy/max": 0.828125, + "entropy/mean": 0.443359375, + "entropy/min": 0.1435546875, + "epoch": 0.755, + "grad_norm": 1.21396233764057, + "kl": 0.234375, + "learning_rate": 2.873058286211374e-07, + "loss": 0.0024085480254143476, + "memory(GiB)": 137.04, + "reward": 1.996396780014038, + "reward_std": 0.046993501484394073, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6595722436904907, + "rewards/EvidenceHallucination/std": 0.3664809465408325, + "rewards/Evidence_Num_Record/mean": 5.61904764175415, + "rewards/Evidence_Num_Record/std": 2.9545531272888184, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.802577555179596, + "rewards/VideoAccuracy/std": 0.37356090545654297, + "step": 755, + "train_speed(iter/s)": 0.242949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 829.0, + "completions/mean_length": 498.5476379394531, + "completions/min_length": 271.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.447265625, + "entropy/min": 0.3125, + "epoch": 0.756, + "grad_norm": 1.2881935895616428, + "kl": 0.26171875, + "learning_rate": 2.8508341233003654e-07, + "loss": 0.002634369535371661, + "memory(GiB)": 137.04, + "reward": 1.6359120607376099, + "reward_std": 0.24966934323310852, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39463216066360474, + "rewards/EvidenceHallucination/std": 0.4662226140499115, + "rewards/Evidence_Num_Record/mean": 4.809524059295654, + "rewards/Evidence_Num_Record/std": 1.6265795230865479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.46174752712249756, + "rewards/VideoAccuracy/std": 0.3875961899757385, + "step": 756, + "train_speed(iter/s)": 0.240242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 625.0, + "completions/mean_length": 454.16668701171875, + "completions/min_length": 287.0, + "entropy/max": 0.8828125, + "entropy/mean": 0.43359375, + "entropy/min": 0.298828125, + "epoch": 0.757, + "grad_norm": 1.2744736247170079, + "kl": 0.291015625, + "learning_rate": 2.828681952410366e-07, + "loss": 0.0029427676927298307, + "memory(GiB)": 137.04, + "reward": 1.8057868480682373, + "reward_std": 0.22455629706382751, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33172181248664856, + "rewards/EvidenceHallucination/std": 0.4148164391517639, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.0406935214996338, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6394423842430115, + "rewards/VideoAccuracy/std": 0.7114621996879578, + "step": 757, + "train_speed(iter/s)": 0.237344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 844.0, + "completions/mean_length": 599.7619018554688, + "completions/min_length": 392.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.349609375, + "entropy/min": 0.154296875, + "epoch": 0.758, + "grad_norm": 0.9210292766563974, + "kl": 0.2138671875, + "learning_rate": 2.8066019966134904e-07, + "loss": 0.0021720444783568382, + "memory(GiB)": 137.04, + "reward": 1.9884105920791626, + "reward_std": 0.15015339851379395, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.542344868183136, + "rewards/EvidenceHallucination/std": 0.41900986433029175, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 2.176849842071533, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7466084361076355, + "rewards/VideoAccuracy/std": 0.5317025780677795, + "step": 758, + "train_speed(iter/s)": 0.232996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 639.0, + "completions/mean_length": 489.0, + "completions/min_length": 286.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.447265625, + "entropy/min": 0.298828125, + "epoch": 0.759, + "grad_norm": 1.3120394752787807, + "kl": 0.283203125, + "learning_rate": 2.784594478254645e-07, + "loss": 0.0028628166764974594, + "memory(GiB)": 137.04, + "reward": 1.803435206413269, + "reward_std": 0.11848029494285583, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5675522685050964, + "rewards/EvidenceHallucination/std": 0.45467978715896606, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 1.3251782655715942, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.604210615158081, + "rewards/VideoAccuracy/std": 0.36147215962409973, + "step": 759, + "train_speed(iter/s)": 0.230587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 826.0, + "completions/mean_length": 479.1428527832031, + "completions/min_length": 355.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.455078125, + "entropy/min": 0.330078125, + "epoch": 0.76, + "grad_norm": 1.0488478775950267, + "kl": 0.279296875, + "learning_rate": 2.762659618949298e-07, + "loss": 0.002819633577018976, + "memory(GiB)": 137.04, + "reward": 1.571948528289795, + "reward_std": 0.17096464335918427, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3613227605819702, + "rewards/EvidenceHallucination/std": 0.4511774778366089, + "rewards/Evidence_Num_Record/mean": 4.5, + "rewards/Evidence_Num_Record/std": 1.329771637916565, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.4330170452594757, + "rewards/VideoAccuracy/std": 0.5008947253227234, + "step": 760, + "train_speed(iter/s)": 0.227529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 696.0, + "completions/mean_length": 536.5714111328125, + "completions/min_length": 387.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.283203125, + "entropy/min": 0.1103515625, + "epoch": 0.761, + "grad_norm": 1.0830395809094013, + "kl": 0.2314453125, + "learning_rate": 2.7407976395812414e-07, + "loss": 0.0023674783296883106, + "memory(GiB)": 137.04, + "reward": 2.1427016258239746, + "reward_std": 0.147226020693779, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5402226448059082, + "rewards/EvidenceHallucination/std": 0.4585988521575928, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 1.1323559284210205, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8394187688827515, + "rewards/VideoAccuracy/std": 0.40415194630622864, + "step": 761, + "train_speed(iter/s)": 0.224616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 924.0, + "completions/mean_length": 538.2619018554688, + "completions/min_length": 219.0, + "entropy/max": 1.2578125, + "entropy/mean": 0.53125, + "entropy/min": 0.283203125, + "epoch": 0.762, + "grad_norm": 1.3926846899697776, + "kl": 0.255859375, + "learning_rate": 2.719008760300359e-07, + "loss": 0.002642326056957245, + "memory(GiB)": 137.04, + "reward": 2.0062599182128906, + "reward_std": 0.2314552515745163, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.731963574886322, + "rewards/EvidenceHallucination/std": 0.3094104826450348, + "rewards/Evidence_Num_Record/mean": 5.38095235824585, + "rewards/Evidence_Num_Record/std": 1.974871277809143, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.7979623079299927, + "rewards/VideoAccuracy/std": 0.3034944236278534, + "step": 762, + "train_speed(iter/s)": 0.221321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 872.0, + "completions/mean_length": 538.7380981445312, + "completions/min_length": 352.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.427734375, + "entropy/min": 0.2265625, + "epoch": 0.763, + "grad_norm": 1.3743981821647915, + "kl": 0.27734375, + "learning_rate": 2.6972932005204265e-07, + "loss": 0.0028074365109205246, + "memory(GiB)": 137.04, + "reward": 1.5624735355377197, + "reward_std": 0.27830272912979126, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3662137985229492, + "rewards/EvidenceHallucination/std": 0.43138769268989563, + "rewards/Evidence_Num_Record/mean": 5.047619342803955, + "rewards/Evidence_Num_Record/std": 1.360637903213501, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.4273257553577423, + "rewards/VideoAccuracy/std": 0.41626739501953125, + "step": 763, + "train_speed(iter/s)": 0.218726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1159.0, + "completions/mean_length": 576.5, + "completions/min_length": 307.0, + "entropy/max": 0.53125, + "entropy/mean": 0.365234375, + "entropy/min": 0.17578125, + "epoch": 0.764, + "grad_norm": 1.1541825979084532, + "kl": 0.2431640625, + "learning_rate": 2.6756511789168924e-07, + "loss": 0.0024590210523456335, + "memory(GiB)": 137.04, + "reward": 2.1274592876434326, + "reward_std": 0.2759854793548584, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4846096634864807, + "rewards/EvidenceHallucination/std": 0.412736177444458, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.1189426183700562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.8686324954032898, + "rewards/VideoAccuracy/std": 0.41240498423576355, + "step": 764, + "train_speed(iter/s)": 0.215251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1158.0, + "completions/mean_length": 620.5714111328125, + "completions/min_length": 312.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.40234375, + "entropy/min": 0.126953125, + "epoch": 0.765, + "grad_norm": 1.2693623528387978, + "kl": 0.2197265625, + "learning_rate": 2.654082913424668e-07, + "loss": 0.002237423788756132, + "memory(GiB)": 137.04, + "reward": 2.0295865535736084, + "reward_std": 0.2693243622779846, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.497098833322525, + "rewards/EvidenceHallucination/std": 0.4144033193588257, + "rewards/Evidence_Num_Record/mean": 5.6666669845581055, + "rewards/Evidence_Num_Record/std": 2.3443410396575928, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.839690625667572, + "rewards/VideoAccuracy/std": 0.37392786145210266, + "step": 765, + "train_speed(iter/s)": 0.212781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 659.0, + "completions/mean_length": 502.26190185546875, + "completions/min_length": 340.0, + "entropy/max": 0.7265625, + "entropy/mean": 0.42578125, + "entropy/min": 0.28125, + "epoch": 0.766, + "grad_norm": 1.1298495045639512, + "kl": 0.259765625, + "learning_rate": 2.6325886212359496e-07, + "loss": 0.002813272178173065, + "memory(GiB)": 137.04, + "reward": 1.403793215751648, + "reward_std": 0.2295052856206894, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.14142382144927979, + "rewards/EvidenceHallucination/std": 0.32684361934661865, + "rewards/Evidence_Num_Record/mean": 4.6666669845581055, + "rewards/Evidence_Num_Record/std": 0.9794639348983765, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.24217505753040314, + "rewards/VideoAccuracy/std": 0.3239958584308624, + "step": 766, + "train_speed(iter/s)": 0.211075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 713.0, + "completions/mean_length": 469.8809509277344, + "completions/min_length": 286.0, + "entropy/max": 0.7421875, + "entropy/mean": 0.427734375, + "entropy/min": 0.310546875, + "epoch": 0.767, + "grad_norm": 0.9006828184671115, + "kl": 0.279296875, + "learning_rate": 2.611168518798026e-07, + "loss": 0.0030206870287656784, + "memory(GiB)": 137.04, + "reward": 1.644193172454834, + "reward_std": 0.11802645772695541, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3115190863609314, + "rewards/EvidenceHallucination/std": 0.42774006724357605, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.0548268556594849, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.48188936710357666, + "rewards/VideoAccuracy/std": 0.6383110284805298, + "step": 767, + "train_speed(iter/s)": 0.208967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 973.0, + "completions/mean_length": 651.3809814453125, + "completions/min_length": 319.0, + "entropy/max": 2.34375, + "entropy/mean": 0.337890625, + "entropy/min": 0.1279296875, + "epoch": 0.768, + "grad_norm": 1.0636513414616937, + "kl": 0.1962890625, + "learning_rate": 2.5898228218110827e-07, + "loss": 0.0020056653302162886, + "memory(GiB)": 137.04, + "reward": 2.059290647506714, + "reward_std": 0.394561767578125, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6037928462028503, + "rewards/EvidenceHallucination/std": 0.4175638258457184, + "rewards/Evidence_Num_Record/mean": 4.738095283508301, + "rewards/Evidence_Num_Record/std": 1.6536659002304077, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.805198609828949, + "rewards/VideoAccuracy/std": 0.457864910364151, + "step": 768, + "train_speed(iter/s)": 0.206188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/mean_length": 526.547607421875, + "completions/min_length": 320.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.4453125, + "entropy/min": 0.2099609375, + "epoch": 0.769, + "grad_norm": 1.2295031926056552, + "kl": 0.24609375, + "learning_rate": 2.568551745226056e-07, + "loss": 0.00250989873893559, + "memory(GiB)": 137.04, + "reward": 1.644586443901062, + "reward_std": 0.09407269209623337, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4121597111225128, + "rewards/EvidenceHallucination/std": 0.47044476866722107, + "rewards/Evidence_Num_Record/mean": 4.88095235824585, + "rewards/Evidence_Num_Record/std": 2.12077260017395, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.462154358625412, + "rewards/VideoAccuracy/std": 0.3949291408061981, + "step": 769, + "train_speed(iter/s)": 0.203477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 744.0, + "completions/mean_length": 463.4761962890625, + "completions/min_length": 306.0, + "entropy/max": 0.89453125, + "entropy/mean": 0.435546875, + "entropy/min": 0.291015625, + "epoch": 0.77, + "grad_norm": 0.932887645967211, + "kl": 0.28515625, + "learning_rate": 2.5473555032424534e-07, + "loss": 0.0028784458991140127, + "memory(GiB)": 137.04, + "reward": 1.218045711517334, + "reward_std": 0.11779654026031494, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.05753537639975548, + "rewards/EvidenceHallucination/std": 0.2104816436767578, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.1736558675765991, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.1398720145225525, + "rewards/VideoAccuracy/std": 0.22162491083145142, + "step": 770, + "train_speed(iter/s)": 0.201377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1152.0, + "completions/mean_length": 575.4761962890625, + "completions/min_length": 361.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.33203125, + "entropy/min": 0.1513671875, + "epoch": 0.771, + "grad_norm": 1.0178695989855275, + "kl": 0.21875, + "learning_rate": 2.526234309306193e-07, + "loss": 0.0022084922529757023, + "memory(GiB)": 137.04, + "reward": 1.9799977540969849, + "reward_std": 0.23305070400238037, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5267239809036255, + "rewards/EvidenceHallucination/std": 0.4495556652545929, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.1768676042556763, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430335700511932, + "rewards/VideoAccuracy/mean": 0.6794148683547974, + "rewards/VideoAccuracy/std": 0.38316991925239563, + "step": 771, + "train_speed(iter/s)": 0.198893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/mean_length": 572.40478515625, + "completions/min_length": 392.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.3984375, + "entropy/min": 0.2294921875, + "epoch": 0.772, + "grad_norm": 1.2712903116725374, + "kl": 0.265625, + "learning_rate": 2.505188376107461e-07, + "loss": 0.0026968661695718765, + "memory(GiB)": 137.04, + "reward": 1.6829097270965576, + "reward_std": 0.31071701645851135, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4338894784450531, + "rewards/EvidenceHallucination/std": 0.46595823764801025, + "rewards/Evidence_Num_Record/mean": 5.690476417541504, + "rewards/Evidence_Num_Record/std": 2.4741404056549072, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5294651389122009, + "rewards/VideoAccuracy/std": 0.47676700353622437, + "step": 772, + "train_speed(iter/s)": 0.196044 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/mean_length": 444.23809814453125, + "completions/min_length": 234.0, + "entropy/max": 0.6328125, + "entropy/mean": 0.41796875, + "entropy/min": 0.267578125, + "epoch": 0.773, + "grad_norm": 1.075729972846353, + "kl": 0.2734375, + "learning_rate": 2.4842179155785736e-07, + "loss": 0.002762245712801814, + "memory(GiB)": 137.04, + "reward": 1.5281906127929688, + "reward_std": 0.19979238510131836, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3526979386806488, + "rewards/EvidenceHallucination/std": 0.46093907952308655, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.2259297370910645, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.40527021884918213, + "rewards/VideoAccuracy/std": 0.40325766801834106, + "step": 773, + "train_speed(iter/s)": 0.193821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 601.0, + "completions/mean_length": 466.4761962890625, + "completions/min_length": 323.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.3671875, + "entropy/min": 0.1689453125, + "epoch": 0.774, + "grad_norm": 1.0876326363448923, + "kl": 0.267578125, + "learning_rate": 2.463323138891837e-07, + "loss": 0.0026955704670399427, + "memory(GiB)": 137.04, + "reward": 1.9218472242355347, + "reward_std": 0.24783822894096375, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4492851197719574, + "rewards/EvidenceHallucination/std": 0.43904009461402893, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.1384934186935425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.6700853109359741, + "rewards/VideoAccuracy/std": 0.5515148639678955, + "step": 774, + "train_speed(iter/s)": 0.192153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1202.0, + "completions/mean_length": 622.6428833007812, + "completions/min_length": 437.0, + "entropy/max": 1.359375, + "entropy/mean": 0.375, + "entropy/min": 0.12060546875, + "epoch": 0.775, + "grad_norm": 0.9770357562197043, + "kl": 0.2216796875, + "learning_rate": 2.4425042564574185e-07, + "loss": 0.002256374340504408, + "memory(GiB)": 137.04, + "reward": 1.7947856187820435, + "reward_std": 0.21613454818725586, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.419340044260025, + "rewards/EvidenceHallucination/std": 0.41037309169769287, + "rewards/Evidence_Num_Record/mean": 6.095238208770752, + "rewards/Evidence_Num_Record/std": 2.953373670578003, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.6109175682067871, + "rewards/VideoAccuracy/std": 0.46995970606803894, + "step": 775, + "train_speed(iter/s)": 0.189629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 732.0, + "completions/mean_length": 501.71429443359375, + "completions/min_length": 281.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.435546875, + "entropy/min": 0.2119140625, + "epoch": 0.776, + "grad_norm": 1.3481529811925332, + "kl": 0.265625, + "learning_rate": 2.4217614779212315e-07, + "loss": 0.0026915818452835083, + "memory(GiB)": 137.04, + "reward": 1.8733131885528564, + "reward_std": 0.23146864771842957, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6218430399894714, + "rewards/EvidenceHallucination/std": 0.44950273633003235, + "rewards/Evidence_Num_Record/mean": 4.952381134033203, + "rewards/Evidence_Num_Record/std": 1.3784470558166504, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.6584683060646057, + "rewards/VideoAccuracy/std": 0.35335877537727356, + "step": 776, + "train_speed(iter/s)": 0.187409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 932.0, + "completions/mean_length": 449.1190490722656, + "completions/min_length": 219.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.408203125, + "entropy/min": 0.1875, + "epoch": 0.777, + "grad_norm": 1.3426598800181293, + "kl": 0.294921875, + "learning_rate": 2.4010950121628313e-07, + "loss": 0.002975872717797756, + "memory(GiB)": 137.04, + "reward": 1.9005649089813232, + "reward_std": 0.3881090581417084, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48913657665252686, + "rewards/EvidenceHallucination/std": 0.4540313482284546, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.3651119470596313, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.70273756980896, + "rewards/VideoAccuracy/std": 0.6419118642807007, + "step": 777, + "train_speed(iter/s)": 0.185488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 844.0, + "completions/mean_length": 608.5, + "completions/min_length": 324.0, + "entropy/max": 1.8125, + "entropy/mean": 0.37890625, + "entropy/min": 0.119140625, + "epoch": 0.778, + "grad_norm": 1.122146108228793, + "kl": 0.2001953125, + "learning_rate": 2.3805050672932925e-07, + "loss": 0.0020415023900568485, + "memory(GiB)": 137.04, + "reward": 2.269925594329834, + "reward_std": 0.07296941429376602, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7881034016609192, + "rewards/EvidenceHallucination/std": 0.22882558405399323, + "rewards/Evidence_Num_Record/mean": 5.142857074737549, + "rewards/Evidence_Num_Record/std": 2.6279122829437256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9789714813232422, + "rewards/VideoAccuracy/std": 0.22200588881969452, + "step": 778, + "train_speed(iter/s)": 0.182988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 794.0, + "completions/mean_length": 517.6428833007812, + "completions/min_length": 331.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.453125, + "entropy/min": 0.322265625, + "epoch": 0.779, + "grad_norm": 1.2723766867809578, + "kl": 0.251953125, + "learning_rate": 2.3599918506531336e-07, + "loss": 0.002551523270085454, + "memory(GiB)": 137.04, + "reward": 1.8165538311004639, + "reward_std": 0.2667812705039978, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5226373076438904, + "rewards/EvidenceHallucination/std": 0.4287099838256836, + "rewards/Evidence_Num_Record/mean": 5.11904764175415, + "rewards/Evidence_Num_Record/std": 1.6260437965393066, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.6644073128700256, + "rewards/VideoAccuracy/std": 0.41102248430252075, + "step": 779, + "train_speed(iter/s)": 0.181485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 641.0, + "completions/mean_length": 464.69049072265625, + "completions/min_length": 331.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.439453125, + "entropy/min": 0.31640625, + "epoch": 0.78, + "grad_norm": 1.2835722921065864, + "kl": 0.267578125, + "learning_rate": 2.339555568810221e-07, + "loss": 0.002697822405025363, + "memory(GiB)": 137.04, + "reward": 1.4789576530456543, + "reward_std": 0.3967013955116272, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.24236686527729034, + "rewards/EvidenceHallucination/std": 0.39292433857917786, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 0.8913052678108215, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.3638175427913666, + "rewards/VideoAccuracy/std": 0.5002002120018005, + "step": 780, + "train_speed(iter/s)": 0.179814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1041.0, + "completions/mean_length": 590.7857055664062, + "completions/min_length": 388.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.294921875, + "entropy/min": 0.1171875, + "epoch": 0.781, + "grad_norm": 0.9951126593890884, + "kl": 0.2041015625, + "learning_rate": 2.3191964275576803e-07, + "loss": 0.0020998376421630383, + "memory(GiB)": 137.04, + "reward": 2.2755444049835205, + "reward_std": 0.16309919953346252, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.647555947303772, + "rewards/EvidenceHallucination/std": 0.44328615069389343, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9750069975852966, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9507946968078613, + "rewards/VideoAccuracy/std": 0.459786981344223, + "step": 781, + "train_speed(iter/s)": 0.177398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 774.0, + "completions/mean_length": 501.76190185546875, + "completions/min_length": 373.0, + "entropy/max": 1.140625, + "entropy/mean": 0.52734375, + "entropy/min": 0.330078125, + "epoch": 0.782, + "grad_norm": 1.2211936154390168, + "kl": 0.27734375, + "learning_rate": 2.2989146319118425e-07, + "loss": 0.0028013205155730247, + "memory(GiB)": 137.04, + "reward": 1.6683571338653564, + "reward_std": 0.16860167682170868, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45266225934028625, + "rewards/EvidenceHallucination/std": 0.4449842572212219, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.0392975807189941, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5111579895019531, + "rewards/VideoAccuracy/std": 0.43782246112823486, + "step": 782, + "train_speed(iter/s)": 0.175883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 772.0, + "completions/mean_length": 487.8333435058594, + "completions/min_length": 303.0, + "entropy/max": 0.640625, + "entropy/mean": 0.439453125, + "entropy/min": 0.263671875, + "epoch": 0.783, + "grad_norm": 1.3299203101555763, + "kl": 0.267578125, + "learning_rate": 2.2787103861101653e-07, + "loss": 0.002707436680793762, + "memory(GiB)": 137.04, + "reward": 1.4210991859436035, + "reward_std": 0.37828224897384644, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2970438599586487, + "rewards/EvidenceHallucination/std": 0.4277191460132599, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.007521152496338, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.32835710048675537, + "rewards/VideoAccuracy/std": 0.42821741104125977, + "step": 783, + "train_speed(iter/s)": 0.174104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1081.0, + "completions/mean_length": 589.7142944335938, + "completions/min_length": 344.0, + "entropy/max": 0.70703125, + "entropy/mean": 0.375, + "entropy/min": 0.1533203125, + "epoch": 0.784, + "grad_norm": 1.3517030280414586, + "kl": 0.234375, + "learning_rate": 2.258583893609175e-07, + "loss": 0.0023501552641391754, + "memory(GiB)": 137.04, + "reward": 1.8731383085250854, + "reward_std": 0.27096807956695557, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5219821333885193, + "rewards/EvidenceHallucination/std": 0.4470759928226471, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 0.8006965517997742, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.6068370342254639, + "rewards/VideoAccuracy/std": 0.48197415471076965, + "step": 784, + "train_speed(iter/s)": 0.172356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 911.0, + "completions/mean_length": 534.90478515625, + "completions/min_length": 276.0, + "entropy/max": 0.6875, + "entropy/mean": 0.408203125, + "entropy/min": 0.138671875, + "epoch": 0.785, + "grad_norm": 1.1364964716258021, + "kl": 0.24609375, + "learning_rate": 2.2385353570824305e-07, + "loss": 0.002507873810827732, + "memory(GiB)": 137.04, + "reward": 1.8118720054626465, + "reward_std": 0.15822087228298187, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4680928885936737, + "rewards/EvidenceHallucination/std": 0.4309382140636444, + "rewards/Evidence_Num_Record/mean": 4.5, + "rewards/Evidence_Num_Record/std": 1.7976950407028198, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.6182535290718079, + "rewards/VideoAccuracy/std": 0.44967034459114075, + "step": 785, + "train_speed(iter/s)": 0.170848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 706.0, + "completions/mean_length": 454.8571472167969, + "completions/min_length": 345.0, + "entropy/max": 0.875, + "entropy/mean": 0.453125, + "entropy/min": 0.302734375, + "epoch": 0.786, + "grad_norm": 1.3404990771945693, + "kl": 0.267578125, + "learning_rate": 2.2185649784184747e-07, + "loss": 0.002728839172050357, + "memory(GiB)": 137.04, + "reward": 1.5887411832809448, + "reward_std": 0.15206003189086914, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4058372378349304, + "rewards/EvidenceHallucination/std": 0.4760162830352783, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.292343258857727, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.4504307508468628, + "rewards/VideoAccuracy/std": 0.46277645230293274, + "step": 786, + "train_speed(iter/s)": 0.169354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 681.0, + "completions/mean_length": 466.3809509277344, + "completions/min_length": 340.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.42578125, + "entropy/min": 0.224609375, + "epoch": 0.787, + "grad_norm": 1.1613612062790581, + "kl": 0.271484375, + "learning_rate": 2.1986729587187958e-07, + "loss": 0.0027181445620954037, + "memory(GiB)": 137.04, + "reward": 1.823914647102356, + "reward_std": 0.251088947057724, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4527112543582916, + "rewards/EvidenceHallucination/std": 0.4648779332637787, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 0.9055256843566895, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.638134241104126, + "rewards/VideoAccuracy/std": 0.6499177813529968, + "step": 787, + "train_speed(iter/s)": 0.168382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 907.0, + "completions/mean_length": 557.0, + "completions/min_length": 365.0, + "entropy/max": 0.91796875, + "entropy/mean": 0.34765625, + "entropy/min": 0.11572265625, + "epoch": 0.788, + "grad_norm": 1.0403974702132741, + "kl": 0.2236328125, + "learning_rate": 2.1788594982958086e-07, + "loss": 0.0023073283955454826, + "memory(GiB)": 137.04, + "reward": 2.395468235015869, + "reward_std": 0.1430952548980713, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6607549786567688, + "rewards/EvidenceHallucination/std": 0.40233632922172546, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 1.129983901977539, + "rewards/VideoAccuracy/std": 0.23416288197040558, + "step": 788, + "train_speed(iter/s)": 0.166278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 762.0, + "completions/mean_length": 502.0238037109375, + "completions/min_length": 354.0, + "entropy/max": 0.71875, + "entropy/mean": 0.46875, + "entropy/min": 0.310546875, + "epoch": 0.789, + "grad_norm": 1.0809163218849382, + "kl": 0.26171875, + "learning_rate": 2.1591247966708426e-07, + "loss": 0.002648875815793872, + "memory(GiB)": 137.04, + "reward": 1.705361008644104, + "reward_std": 0.22425346076488495, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5174151659011841, + "rewards/EvidenceHallucination/std": 0.4102669358253479, + "rewards/Evidence_Num_Record/mean": 4.690476417541504, + "rewards/Evidence_Num_Record/std": 1.7736291885375977, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5352112054824829, + "rewards/VideoAccuracy/std": 0.4187927544116974, + "step": 789, + "train_speed(iter/s)": 0.165003 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 734.0, + "completions/mean_length": 478.8095397949219, + "completions/min_length": 337.0, + "entropy/max": 0.65625, + "entropy/mean": 0.439453125, + "entropy/min": 0.3359375, + "epoch": 0.79, + "grad_norm": 1.199193262453143, + "kl": 0.287109375, + "learning_rate": 2.139469052572127e-07, + "loss": 0.0028981564100831747, + "memory(GiB)": 137.04, + "reward": 1.6890815496444702, + "reward_std": 0.20440547168254852, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44685637950897217, + "rewards/EvidenceHallucination/std": 0.43943482637405396, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 1.1220521926879883, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5330434441566467, + "rewards/VideoAccuracy/std": 0.4751872420310974, + "step": 790, + "train_speed(iter/s)": 0.163633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 823.0, + "completions/mean_length": 528.40478515625, + "completions/min_length": 359.0, + "entropy/max": 0.7265625, + "entropy/mean": 0.30078125, + "entropy/min": 0.1337890625, + "epoch": 0.791, + "grad_norm": 1.0779360237652633, + "kl": 0.2275390625, + "learning_rate": 2.1198924639327808e-07, + "loss": 0.00232085888274014, + "memory(GiB)": 137.04, + "reward": 2.4378058910369873, + "reward_std": 0.1298271268606186, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6932946443557739, + "rewards/EvidenceHallucination/std": 0.3891344368457794, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.7344991564750671, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.0991469621658325, + "rewards/VideoAccuracy/std": 0.21373583376407623, + "step": 791, + "train_speed(iter/s)": 0.161713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1042.0, + "completions/mean_length": 544.3095092773438, + "completions/min_length": 349.0, + "entropy/max": 0.93359375, + "entropy/mean": 0.47265625, + "entropy/min": 0.275390625, + "epoch": 0.792, + "grad_norm": 1.10974018722686, + "kl": 0.255859375, + "learning_rate": 2.1003952278888382e-07, + "loss": 0.002613186603412032, + "memory(GiB)": 137.04, + "reward": 1.7953269481658936, + "reward_std": 0.1835445612668991, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.56269371509552, + "rewards/EvidenceHallucination/std": 0.4327901303768158, + "rewards/Evidence_Num_Record/mean": 5.357142925262451, + "rewards/Evidence_Num_Record/std": 2.022090435028076, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6161215901374817, + "rewards/VideoAccuracy/std": 0.41941821575164795, + "step": 792, + "train_speed(iter/s)": 0.160502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 724.0, + "completions/mean_length": 425.3809509277344, + "completions/min_length": 282.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.451171875, + "entropy/min": 0.306640625, + "epoch": 0.793, + "grad_norm": 1.0212667730806406, + "kl": 0.291015625, + "learning_rate": 2.08097754077725e-07, + "loss": 0.00294006010517478, + "memory(GiB)": 137.04, + "reward": 1.322772741317749, + "reward_std": 0.10889124125242233, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.19463838636875153, + "rewards/EvidenceHallucination/std": 0.37754857540130615, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.2010449171066284, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777005434036255, + "rewards/VideoAccuracy/mean": 0.26003551483154297, + "rewards/VideoAccuracy/std": 0.3910863697528839, + "step": 793, + "train_speed(iter/s)": 0.158958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 962.0, + "completions/mean_length": 566.357177734375, + "completions/min_length": 351.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.349609375, + "entropy/min": 0.13671875, + "epoch": 0.794, + "grad_norm": 1.0006950576747762, + "kl": 0.25, + "learning_rate": 2.0616395981339073e-07, + "loss": 0.002716131042689085, + "memory(GiB)": 137.04, + "reward": 1.928626537322998, + "reward_std": 0.24823066592216492, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3988966643810272, + "rewards/EvidenceHallucination/std": 0.4528437554836273, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 0.9016696214675903, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6821804642677307, + "rewards/VideoAccuracy/std": 0.5712240934371948, + "step": 794, + "train_speed(iter/s)": 0.157512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 883.0, + "completions/mean_length": 594.1428833007812, + "completions/min_length": 375.0, + "entropy/max": 1.375, + "entropy/mean": 0.44140625, + "entropy/min": 0.1923828125, + "epoch": 0.795, + "grad_norm": 1.0493805498050126, + "kl": 0.2158203125, + "learning_rate": 2.042381594691678e-07, + "loss": 0.00218667252920568, + "memory(GiB)": 137.04, + "reward": 2.0298008918762207, + "reward_std": 0.08417399227619171, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5516841411590576, + "rewards/EvidenceHallucination/std": 0.43587151169776917, + "rewards/Evidence_Num_Record/mean": 4.88095235824585, + "rewards/Evidence_Num_Record/std": 1.9025321006774902, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8194637298583984, + "rewards/VideoAccuracy/std": 0.2984185516834259, + "step": 795, + "train_speed(iter/s)": 0.156147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 650.0, + "completions/mean_length": 449.69049072265625, + "completions/min_length": 300.0, + "entropy/max": 0.734375, + "entropy/mean": 0.4375, + "entropy/min": 0.267578125, + "epoch": 0.796, + "grad_norm": 1.1736701624981345, + "kl": 0.291015625, + "learning_rate": 2.0232037243784472e-07, + "loss": 0.0029487479478120804, + "memory(GiB)": 137.04, + "reward": 1.5112559795379639, + "reward_std": 0.11511891335248947, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3990863263607025, + "rewards/EvidenceHallucination/std": 0.4522339999675751, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.9358023405075073, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.39810535311698914, + "rewards/VideoAccuracy/std": 0.3985377550125122, + "step": 796, + "train_speed(iter/s)": 0.154814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 595.0, + "completions/mean_length": 446.90478515625, + "completions/min_length": 339.0, + "entropy/max": 0.5625, + "entropy/mean": 0.41015625, + "entropy/min": 0.30859375, + "epoch": 0.797, + "grad_norm": 1.2352760996533378, + "kl": 0.306640625, + "learning_rate": 2.0041061803151505e-07, + "loss": 0.0030723009258508682, + "memory(GiB)": 137.04, + "reward": 2.0463526248931885, + "reward_std": 0.21899209916591644, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5365279912948608, + "rewards/EvidenceHallucination/std": 0.4506574273109436, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.8611501455307007, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.8438087105751038, + "rewards/VideoAccuracy/std": 0.6808693408966064, + "step": 797, + "train_speed(iter/s)": 0.153683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/mean_length": 594.5952758789062, + "completions/min_length": 345.0, + "entropy/max": 0.6796875, + "entropy/mean": 0.291015625, + "entropy/min": 0.1552734375, + "epoch": 0.798, + "grad_norm": 0.8299365566704086, + "kl": 0.2119140625, + "learning_rate": 1.985089154813846e-07, + "loss": 0.002190345199778676, + "memory(GiB)": 137.04, + "reward": 1.9640860557556152, + "reward_std": 0.22710314393043518, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5165387392044067, + "rewards/EvidenceHallucination/std": 0.45965853333473206, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 2.264711856842041, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.7322068810462952, + "rewards/VideoAccuracy/std": 0.5266538262367249, + "step": 798, + "train_speed(iter/s)": 0.152043 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 839.0, + "completions/mean_length": 482.5714416503906, + "completions/min_length": 281.0, + "entropy/max": 1.0234375, + "entropy/mean": 0.462890625, + "entropy/min": 0.263671875, + "epoch": 0.799, + "grad_norm": 1.3890991623959492, + "kl": 0.267578125, + "learning_rate": 1.9661528393757742e-07, + "loss": 0.0027191739063709974, + "memory(GiB)": 137.04, + "reward": 1.8365747928619385, + "reward_std": 0.30120939016342163, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6418147683143616, + "rewards/EvidenceHallucination/std": 0.4065225124359131, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.2598047256469727, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6415451765060425, + "rewards/VideoAccuracy/std": 0.4152042269706726, + "step": 799, + "train_speed(iter/s)": 0.15078 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 797.0, + "completions/mean_length": 475.1428527832031, + "completions/min_length": 336.0, + "entropy/max": 0.59375, + "entropy/mean": 0.447265625, + "entropy/min": 0.287109375, + "epoch": 0.8, + "grad_norm": 1.034196467948775, + "kl": 0.283203125, + "learning_rate": 1.9472974246894136e-07, + "loss": 0.002842884510755539, + "memory(GiB)": 137.04, + "reward": 1.516119122505188, + "reward_std": 0.16130468249320984, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29011085629463196, + "rewards/EvidenceHallucination/std": 0.4407220482826233, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.6803189516067505, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.42476359009742737, + "rewards/VideoAccuracy/std": 0.5281235575675964, + "step": 800, + "train_speed(iter/s)": 0.148553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 936.0, + "completions/mean_length": 558.3333740234375, + "completions/min_length": 369.0, + "entropy/max": 0.578125, + "entropy/mean": 0.298828125, + "entropy/min": 0.1337890625, + "epoch": 0.801, + "grad_norm": 1.0164229415578283, + "kl": 0.2314453125, + "learning_rate": 1.9285231006285853e-07, + "loss": 0.0023471282329410315, + "memory(GiB)": 137.04, + "reward": 2.2122294902801514, + "reward_std": 0.16193059086799622, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5297513008117676, + "rewards/EvidenceHallucination/std": 0.40756309032440186, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.292343258857727, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9110410809516907, + "rewards/VideoAccuracy/std": 0.4661368727684021, + "step": 801, + "train_speed(iter/s)": 0.146065 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/mean_length": 506.69049072265625, + "completions/min_length": 304.0, + "entropy/max": 1.3828125, + "entropy/mean": 0.51171875, + "entropy/min": 0.275390625, + "epoch": 0.802, + "grad_norm": 1.0871509073724928, + "kl": 0.267578125, + "learning_rate": 1.9098300562505264e-07, + "loss": 0.0027364350389689207, + "memory(GiB)": 137.04, + "reward": 1.5814732313156128, + "reward_std": 0.09867032617330551, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37604111433029175, + "rewards/EvidenceHallucination/std": 0.4459204375743866, + "rewards/Evidence_Num_Record/mean": 5.1666669845581055, + "rewards/Evidence_Num_Record/std": 1.5759884119033813, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.4491220712661743, + "rewards/VideoAccuracy/std": 0.4343416392803192, + "step": 802, + "train_speed(iter/s)": 0.145075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 572.0, + "completions/mean_length": 434.69049072265625, + "completions/min_length": 246.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.41015625, + "entropy/min": 0.255859375, + "epoch": 0.803, + "grad_norm": 1.2846279119399278, + "kl": 0.294921875, + "learning_rate": 1.89121847979398e-07, + "loss": 0.002950625028461218, + "memory(GiB)": 137.04, + "reward": 1.519728422164917, + "reward_std": 0.18460945785045624, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3646612763404846, + "rewards/EvidenceHallucination/std": 0.47237429022789, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.8873874545097351, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4467962384223938, + "rewards/VideoAccuracy/std": 0.4420500099658966, + "step": 803, + "train_speed(iter/s)": 0.143886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 593.0, + "completions/mean_length": 462.0714416503906, + "completions/min_length": 382.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.359375, + "entropy/min": 0.1513671875, + "epoch": 0.804, + "grad_norm": 1.3904429529862117, + "kl": 0.26171875, + "learning_rate": 1.8726885586773211e-07, + "loss": 0.0026322491466999054, + "memory(GiB)": 137.04, + "reward": 2.227130651473999, + "reward_std": 0.16979330778121948, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6671773195266724, + "rewards/EvidenceHallucination/std": 0.41588160395622253, + "rewards/Evidence_Num_Record/mean": 3.5714285373687744, + "rewards/Evidence_Num_Record/std": 0.8305994868278503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.9317903518676758, + "rewards/VideoAccuracy/std": 0.3766253590583801, + "step": 804, + "train_speed(iter/s)": 0.142855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1068.0, + "completions/mean_length": 504.40478515625, + "completions/min_length": 308.0, + "entropy/max": 2.453125, + "entropy/mean": 0.50390625, + "entropy/min": 0.1748046875, + "epoch": 0.805, + "grad_norm": 1.2911063113332604, + "kl": 0.2451171875, + "learning_rate": 1.8542404794966427e-07, + "loss": 0.002523736096918583, + "memory(GiB)": 137.04, + "reward": 2.2841567993164062, + "reward_std": 0.05647649988532066, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.8305041790008545, + "rewards/EvidenceHallucination/std": 0.14656081795692444, + "rewards/Evidence_Num_Record/mean": 4.714285850524902, + "rewards/Evidence_Num_Record/std": 2.1559414863586426, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 1.022817850112915, + "rewards/VideoAccuracy/std": 0.2014765441417694, + "step": 805, + "train_speed(iter/s)": 0.141462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 786.0, + "completions/mean_length": 492.19049072265625, + "completions/min_length": 349.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.447265625, + "entropy/min": 0.2412109375, + "epoch": 0.806, + "grad_norm": 1.0349966850865397, + "kl": 0.263671875, + "learning_rate": 1.8358744280239048e-07, + "loss": 0.0030731498263776302, + "memory(GiB)": 137.04, + "reward": 1.6029757261276245, + "reward_std": 0.09720693528652191, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37939584255218506, + "rewards/EvidenceHallucination/std": 0.4710071086883545, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.1699390411376953, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.4651917517185211, + "rewards/VideoAccuracy/std": 0.4290243685245514, + "step": 806, + "train_speed(iter/s)": 0.140522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 710.0, + "completions/mean_length": 455.69049072265625, + "completions/min_length": 287.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.40234375, + "entropy/min": 0.2392578125, + "epoch": 0.807, + "grad_norm": 1.2630643967050652, + "kl": 0.279296875, + "learning_rate": 1.8175905892050348e-07, + "loss": 0.0028256457298994064, + "memory(GiB)": 137.04, + "reward": 2.0943901538848877, + "reward_std": 0.21325063705444336, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6793032288551331, + "rewards/EvidenceHallucination/std": 0.4282911419868469, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 1.1251531839370728, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.8632914423942566, + "rewards/VideoAccuracy/std": 0.5636486411094666, + "step": 807, + "train_speed(iter/s)": 0.138582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1039.0, + "completions/mean_length": 619.952392578125, + "completions/min_length": 344.0, + "entropy/max": 0.99609375, + "entropy/mean": 0.369140625, + "entropy/min": 0.1064453125, + "epoch": 0.808, + "grad_norm": 0.9905238036154161, + "kl": 0.1982421875, + "learning_rate": 1.7993891471580892e-07, + "loss": 0.0020271213725209236, + "memory(GiB)": 137.04, + "reward": 1.9375890493392944, + "reward_std": 0.13439425826072693, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5044342279434204, + "rewards/EvidenceHallucination/std": 0.4065171182155609, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 1.2623374462127686, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7033689618110657, + "rewards/VideoAccuracy/std": 0.3760945200920105, + "step": 808, + "train_speed(iter/s)": 0.137518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 975.0, + "completions/mean_length": 511.9761962890625, + "completions/min_length": 315.0, + "entropy/max": 1.2265625, + "entropy/mean": 0.42578125, + "entropy/min": 0.2197265625, + "epoch": 0.809, + "grad_norm": 1.2915911693627269, + "kl": 0.251953125, + "learning_rate": 1.78127028517139e-07, + "loss": 0.002545798197388649, + "memory(GiB)": 137.04, + "reward": 1.8296865224838257, + "reward_std": 0.27932512760162354, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4577580392360687, + "rewards/EvidenceHallucination/std": 0.4566575288772583, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.060591697692871, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6714683175086975, + "rewards/VideoAccuracy/std": 0.4044560194015503, + "step": 809, + "train_speed(iter/s)": 0.13566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 591.0, + "completions/mean_length": 456.6428527832031, + "completions/min_length": 335.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.443359375, + "entropy/min": 0.318359375, + "epoch": 0.81, + "grad_norm": 1.2097038316865059, + "kl": 0.29296875, + "learning_rate": 1.763234185701673e-07, + "loss": 0.0029480045195668936, + "memory(GiB)": 137.04, + "reward": 1.1652276515960693, + "reward_std": 0.1880207657814026, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.05281609669327736, + "rewards/EvidenceHallucination/std": 0.19776032865047455, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.6357524394989014, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.12133102118968964, + "rewards/VideoAccuracy/std": 0.2569405734539032, + "step": 810, + "train_speed(iter/s)": 0.135135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 696.0, + "completions/mean_length": 489.69049072265625, + "completions/min_length": 341.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.30078125, + "entropy/min": 0.1435546875, + "epoch": 0.811, + "grad_norm": 1.0259340548008493, + "kl": 0.234375, + "learning_rate": 1.7452810303722598e-07, + "loss": 0.0023717356380075216, + "memory(GiB)": 137.04, + "reward": 2.467923641204834, + "reward_std": 0.0862434059381485, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7541826367378235, + "rewards/EvidenceHallucination/std": 0.36006125807762146, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.525759220123291, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9047619104385376, + "rewards/HonestTime/std": 0.297101765871048, + "rewards/VideoAccuracy/mean": 1.1361346244812012, + "rewards/VideoAccuracy/std": 0.4355044364929199, + "step": 811, + "train_speed(iter/s)": 0.134143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 913.0, + "completions/mean_length": 519.2619018554688, + "completions/min_length": 311.0, + "entropy/max": 1.515625, + "entropy/mean": 0.515625, + "entropy/min": 0.287109375, + "epoch": 0.812, + "grad_norm": 1.4005353557473488, + "kl": 0.255859375, + "learning_rate": 1.7274109999712294e-07, + "loss": 0.002595985308289528, + "memory(GiB)": 137.04, + "reward": 1.8176084756851196, + "reward_std": 0.33944687247276306, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.556121826171875, + "rewards/EvidenceHallucination/std": 0.41618698835372925, + "rewards/Evidence_Num_Record/mean": 5.023809432983398, + "rewards/Evidence_Num_Record/std": 1.7034841775894165, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6730506420135498, + "rewards/VideoAccuracy/std": 0.4331187307834625, + "step": 812, + "train_speed(iter/s)": 0.133025 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 672.0, + "completions/mean_length": 459.1190490722656, + "completions/min_length": 306.0, + "entropy/max": 0.734375, + "entropy/mean": 0.439453125, + "entropy/min": 0.294921875, + "epoch": 0.813, + "grad_norm": 1.1449873713008976, + "kl": 0.259765625, + "learning_rate": 1.7096242744495838e-07, + "loss": 0.0026133707724511623, + "memory(GiB)": 137.04, + "reward": 1.391340970993042, + "reward_std": 0.20070883631706238, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.27407148480415344, + "rewards/EvidenceHallucination/std": 0.4221936762332916, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 1.1313297748565674, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.33652666211128235, + "rewards/VideoAccuracy/std": 0.4078528881072998, + "step": 813, + "train_speed(iter/s)": 0.13225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/mean_length": 492.4761962890625, + "completions/min_length": 281.0, + "entropy/max": 0.46875, + "entropy/mean": 0.341796875, + "entropy/min": 0.1455078125, + "epoch": 0.814, + "grad_norm": 1.1441413979379622, + "kl": 0.25390625, + "learning_rate": 1.6919210329194534e-07, + "loss": 0.0025603468529880047, + "memory(GiB)": 137.04, + "reward": 2.121767282485962, + "reward_std": 0.18712137639522552, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5634064674377441, + "rewards/EvidenceHallucination/std": 0.4153032898902893, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.6595144867897034, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8424190282821655, + "rewards/VideoAccuracy/std": 0.4346762001514435, + "step": 814, + "train_speed(iter/s)": 0.131271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 921.0, + "completions/mean_length": 591.2142944335938, + "completions/min_length": 274.0, + "entropy/max": 1.7109375, + "entropy/mean": 0.490234375, + "entropy/min": 0.1640625, + "epoch": 0.815, + "grad_norm": 1.1221581338540028, + "kl": 0.22265625, + "learning_rate": 1.674301453652287e-07, + "loss": 0.0022738249972462654, + "memory(GiB)": 137.04, + "reward": 1.8590971231460571, + "reward_std": 0.3513486385345459, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3494276702404022, + "rewards/EvidenceHallucination/std": 0.4168657064437866, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 2.0952911376953125, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.6892116069793701, + "rewards/VideoAccuracy/std": 0.5239997506141663, + "step": 815, + "train_speed(iter/s)": 0.130005 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 826.0, + "completions/mean_length": 473.3809509277344, + "completions/min_length": 309.0, + "entropy/max": 0.8203125, + "entropy/mean": 0.4453125, + "entropy/min": 0.294921875, + "epoch": 0.816, + "grad_norm": 1.2570679726908929, + "kl": 0.2578125, + "learning_rate": 1.6567657140770474e-07, + "loss": 0.002602183260023594, + "memory(GiB)": 137.04, + "reward": 1.8965072631835938, + "reward_std": 0.12467285245656967, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6578428149223328, + "rewards/EvidenceHallucination/std": 0.3929106295108795, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.9927144646644592, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.7649388313293457, + "rewards/VideoAccuracy/std": 0.4106225371360779, + "step": 816, + "train_speed(iter/s)": 0.129199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1051.0, + "completions/mean_length": 488.5714416503906, + "completions/min_length": 293.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.375, + "entropy/min": 0.1494140625, + "epoch": 0.817, + "grad_norm": 1.2585392025221378, + "kl": 0.302734375, + "learning_rate": 1.6393139907784403e-07, + "loss": 0.0030637290328741074, + "memory(GiB)": 137.04, + "reward": 1.8628321886062622, + "reward_std": 0.21498841047286987, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49281394481658936, + "rewards/EvidenceHallucination/std": 0.4585222601890564, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.3216679096221924, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6642693877220154, + "rewards/VideoAccuracy/std": 0.5453062057495117, + "step": 817, + "train_speed(iter/s)": 0.128316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 833.0, + "completions/mean_length": 528.0238037109375, + "completions/min_length": 349.0, + "entropy/max": 0.9296875, + "entropy/mean": 0.30859375, + "entropy/min": 0.12890625, + "epoch": 0.818, + "grad_norm": 0.8079230123575736, + "kl": 0.2255859375, + "learning_rate": 1.621946459495127e-07, + "loss": 0.002292902674525976, + "memory(GiB)": 137.04, + "reward": 2.204044818878174, + "reward_std": 0.053180523216724396, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6027761101722717, + "rewards/EvidenceHallucination/std": 0.4402962923049927, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.092950701713562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9501564502716064, + "rewards/VideoAccuracy/std": 0.4471309781074524, + "step": 818, + "train_speed(iter/s)": 0.127354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 827.0, + "completions/mean_length": 479.0952453613281, + "completions/min_length": 358.0, + "entropy/max": 0.8515625, + "entropy/mean": 0.44140625, + "entropy/min": 0.2578125, + "epoch": 0.819, + "grad_norm": 1.070203931568812, + "kl": 0.267578125, + "learning_rate": 1.6046632951179507e-07, + "loss": 0.0027072113007307053, + "memory(GiB)": 137.04, + "reward": 1.77433180809021, + "reward_std": 0.03583128750324249, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6151052713394165, + "rewards/EvidenceHallucination/std": 0.46708250045776367, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.1526871919631958, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6179774403572083, + "rewards/VideoAccuracy/std": 0.4577104449272156, + "step": 819, + "train_speed(iter/s)": 0.126477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 632.0, + "completions/mean_length": 444.0476379394531, + "completions/min_length": 229.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.4296875, + "entropy/min": 0.224609375, + "epoch": 0.82, + "grad_norm": 1.1627853387496583, + "kl": 0.271484375, + "learning_rate": 1.5874646716881868e-07, + "loss": 0.002706526778638363, + "memory(GiB)": 137.04, + "reward": 1.6316003799438477, + "reward_std": 0.21008215844631195, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40796494483947754, + "rewards/EvidenceHallucination/std": 0.4385842978954315, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.8207527995109558, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5166741013526917, + "rewards/VideoAccuracy/std": 0.5705318450927734, + "step": 820, + "train_speed(iter/s)": 0.125693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/mean_length": 559.547607421875, + "completions/min_length": 394.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.3046875, + "entropy/min": 0.140625, + "epoch": 0.821, + "grad_norm": 1.0176153711647942, + "kl": 0.2109375, + "learning_rate": 1.5703507623957847e-07, + "loss": 0.0021406924352049828, + "memory(GiB)": 137.04, + "reward": 2.3748960494995117, + "reward_std": 0.22574886679649353, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5159128904342651, + "rewards/EvidenceHallucination/std": 0.4431808590888977, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9997095465660095, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 1.0764751434326172, + "rewards/VideoAccuracy/std": 0.47842711210250854, + "step": 821, + "train_speed(iter/s)": 0.124855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1140.0, + "completions/mean_length": 550.8333740234375, + "completions/min_length": 338.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.4453125, + "entropy/min": 0.2431640625, + "epoch": 0.822, + "grad_norm": 1.0987707198826209, + "kl": 0.2431640625, + "learning_rate": 1.5533217395776188e-07, + "loss": 0.002876720856875181, + "memory(GiB)": 137.04, + "reward": 1.8405925035476685, + "reward_std": 0.22467979788780212, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5890406370162964, + "rewards/EvidenceHallucination/std": 0.43359968066215515, + "rewards/Evidence_Num_Record/mean": 5.476190567016602, + "rewards/Evidence_Num_Record/std": 2.265737295150757, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6894509196281433, + "rewards/VideoAccuracy/std": 0.4250394403934479, + "step": 822, + "train_speed(iter/s)": 0.124024 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/mean_length": 456.7857360839844, + "completions/min_length": 329.0, + "entropy/max": 0.578125, + "entropy/mean": 0.44140625, + "entropy/min": 0.2734375, + "epoch": 0.823, + "grad_norm": 1.4579151535626775, + "kl": 0.291015625, + "learning_rate": 1.536377774715757e-07, + "loss": 0.0029377522878348827, + "memory(GiB)": 137.04, + "reward": 1.6815420389175415, + "reward_std": 0.44127708673477173, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5124199986457825, + "rewards/EvidenceHallucination/std": 0.4615868031978607, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.0169869661331177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.579058051109314, + "rewards/VideoAccuracy/std": 0.4553752541542053, + "step": 823, + "train_speed(iter/s)": 0.123081 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 934.0, + "completions/mean_length": 484.0952453613281, + "completions/min_length": 354.0, + "entropy/max": 0.51171875, + "entropy/mean": 0.36328125, + "entropy/min": 0.10205078125, + "epoch": 0.824, + "grad_norm": 1.2073259135000711, + "kl": 0.255859375, + "learning_rate": 1.5195190384357404e-07, + "loss": 0.00258562620729208, + "memory(GiB)": 137.04, + "reward": 2.0996549129486084, + "reward_std": 0.11016413569450378, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5467841029167175, + "rewards/EvidenceHallucination/std": 0.44064101576805115, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.7501451969146729, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8236314654350281, + "rewards/VideoAccuracy/std": 0.3638036251068115, + "step": 824, + "train_speed(iter/s)": 0.122403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 877.0, + "completions/mean_length": 534.8095092773438, + "completions/min_length": 385.0, + "entropy/max": 1.609375, + "entropy/mean": 0.453125, + "entropy/min": 0.1767578125, + "epoch": 0.825, + "grad_norm": 1.0607292377664361, + "kl": 0.2314453125, + "learning_rate": 1.5027457005048572e-07, + "loss": 0.0023629399947822094, + "memory(GiB)": 137.04, + "reward": 1.9367122650146484, + "reward_std": 0.22675660252571106, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5748409032821655, + "rewards/EvidenceHallucination/std": 0.40622395277023315, + "rewards/Evidence_Num_Record/mean": 4.476190567016602, + "rewards/Evidence_Num_Record/std": 1.7283587455749512, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.7217440009117126, + "rewards/VideoAccuracy/std": 0.4476911723613739, + "step": 825, + "train_speed(iter/s)": 0.121546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 833.0, + "completions/mean_length": 466.5476379394531, + "completions/min_length": 355.0, + "entropy/max": 0.828125, + "entropy/mean": 0.4453125, + "entropy/min": 0.25390625, + "epoch": 0.826, + "grad_norm": 1.2128384339632001, + "kl": 0.26171875, + "learning_rate": 1.486057929830431e-07, + "loss": 0.002626287518069148, + "memory(GiB)": 137.04, + "reward": 1.6029353141784668, + "reward_std": 0.28161782026290894, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4569338858127594, + "rewards/EvidenceHallucination/std": 0.4684075713157654, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.6957966089248657, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5115485787391663, + "rewards/VideoAccuracy/std": 0.4963649809360504, + "step": 826, + "train_speed(iter/s)": 0.120613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 650.0, + "completions/mean_length": 449.8095397949219, + "completions/min_length": 309.0, + "entropy/max": 0.8359375, + "entropy/mean": 0.466796875, + "entropy/min": 0.302734375, + "epoch": 0.827, + "grad_norm": 1.2506186078462593, + "kl": 0.287109375, + "learning_rate": 1.469455894458129e-07, + "loss": 0.002896510064601898, + "memory(GiB)": 137.04, + "reward": 1.549792766571045, + "reward_std": 0.17999663949012756, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2444106489419937, + "rewards/EvidenceHallucination/std": 0.39539089798927307, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 0.9770894646644592, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.40091055631637573, + "rewards/VideoAccuracy/std": 0.5077887773513794, + "step": 827, + "train_speed(iter/s)": 0.120254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 823.0, + "completions/mean_length": 586.357177734375, + "completions/min_length": 373.0, + "entropy/max": 0.890625, + "entropy/mean": 0.30078125, + "entropy/min": 0.1455078125, + "epoch": 0.828, + "grad_norm": 1.0323021294234873, + "kl": 0.203125, + "learning_rate": 1.4529397615702654e-07, + "loss": 0.002064808737486601, + "memory(GiB)": 137.04, + "reward": 2.132803201675415, + "reward_std": 0.2657012939453125, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6109729409217834, + "rewards/EvidenceHallucination/std": 0.4012887179851532, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.916046142578125, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8772752285003662, + "rewards/VideoAccuracy/std": 0.4796368479728699, + "step": 828, + "train_speed(iter/s)": 0.118909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 819.0, + "completions/mean_length": 471.8095397949219, + "completions/min_length": 284.0, + "entropy/max": 0.7734375, + "entropy/mean": 0.4921875, + "entropy/min": 0.3515625, + "epoch": 0.829, + "grad_norm": 1.2255006294837825, + "kl": 0.26953125, + "learning_rate": 1.4365096974841106e-07, + "loss": 0.002734632696956396, + "memory(GiB)": 137.04, + "reward": 1.8650703430175781, + "reward_std": 0.21632973849773407, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6693183183670044, + "rewards/EvidenceHallucination/std": 0.41198858618736267, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 1.5469801425933838, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.7026352882385254, + "rewards/VideoAccuracy/std": 0.4336860179901123, + "step": 829, + "train_speed(iter/s)": 0.118299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 655.0, + "completions/mean_length": 462.8571472167969, + "completions/min_length": 286.0, + "entropy/max": 0.8359375, + "entropy/mean": 0.421875, + "entropy/min": 0.2578125, + "epoch": 0.83, + "grad_norm": 1.2870548016908077, + "kl": 0.283203125, + "learning_rate": 1.4201658676502293e-07, + "loss": 0.0028648152947425842, + "memory(GiB)": 137.04, + "reward": 1.3752903938293457, + "reward_std": 0.33857226371765137, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21359898149967194, + "rewards/EvidenceHallucination/std": 0.38932594656944275, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.9122345447540283, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.2992372214794159, + "rewards/VideoAccuracy/std": 0.3674658536911011, + "step": 830, + "train_speed(iter/s)": 0.11768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 792.0, + "completions/mean_length": 542.357177734375, + "completions/min_length": 388.0, + "entropy/max": 0.5, + "entropy/mean": 0.28515625, + "entropy/min": 0.150390625, + "epoch": 0.831, + "grad_norm": 0.9540566926721836, + "kl": 0.2197265625, + "learning_rate": 1.4039084366508092e-07, + "loss": 0.0022365141194313765, + "memory(GiB)": 137.04, + "reward": 2.3009817600250244, + "reward_std": 0.11945509910583496, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6476714611053467, + "rewards/EvidenceHallucination/std": 0.42851969599723816, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.7904775738716125, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.9762094020843506, + "rewards/VideoAccuracy/std": 0.3568113446235657, + "step": 831, + "train_speed(iter/s)": 0.11708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 901.0, + "completions/mean_length": 488.5238037109375, + "completions/min_length": 262.0, + "entropy/max": 0.8515625, + "entropy/mean": 0.482421875, + "entropy/min": 0.232421875, + "epoch": 0.832, + "grad_norm": 0.9672332619213181, + "kl": 0.275390625, + "learning_rate": 1.3877375681979942e-07, + "loss": 0.002822866663336754, + "memory(GiB)": 137.04, + "reward": 1.3189421892166138, + "reward_std": 0.1224055141210556, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22665101289749146, + "rewards/EvidenceHallucination/std": 0.3877301812171936, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 1.5002902746200562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777005434036255, + "rewards/VideoAccuracy/mean": 0.249802365899086, + "rewards/VideoAccuracy/std": 0.40132516622543335, + "step": 832, + "train_speed(iter/s)": 0.115738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 560.0, + "completions/mean_length": 428.7857360839844, + "completions/min_length": 232.0, + "entropy/max": 0.76171875, + "entropy/mean": 0.423828125, + "entropy/min": 0.265625, + "epoch": 0.833, + "grad_norm": 1.2916113779099452, + "kl": 0.294921875, + "learning_rate": 1.3716534251322544e-07, + "loss": 0.0029611135832965374, + "memory(GiB)": 137.36, + "reward": 1.519522786140442, + "reward_std": 0.27905896306037903, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35443753004074097, + "rewards/EvidenceHallucination/std": 0.4601483643054962, + "rewards/Evidence_Num_Record/mean": 3.809523820877075, + "rewards/Evidence_Num_Record/std": 0.6339229345321655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.41530200839042664, + "rewards/VideoAccuracy/std": 0.44496962428092957, + "step": 833, + "train_speed(iter/s)": 0.11507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 627.0, + "completions/mean_length": 469.66668701171875, + "completions/min_length": 345.0, + "entropy/max": 0.81640625, + "entropy/mean": 0.3828125, + "entropy/min": 0.1708984375, + "epoch": 0.834, + "grad_norm": 1.314431795447688, + "kl": 0.259765625, + "learning_rate": 1.3556561694207335e-07, + "loss": 0.0026252754032611847, + "memory(GiB)": 137.36, + "reward": 2.1789443492889404, + "reward_std": 0.21588024497032166, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.666461706161499, + "rewards/EvidenceHallucination/std": 0.431160569190979, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.7213357090950012, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8789854049682617, + "rewards/VideoAccuracy/std": 0.44793781638145447, + "step": 834, + "train_speed(iter/s)": 0.114428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1016.0, + "completions/mean_length": 519.5952758789062, + "completions/min_length": 277.0, + "entropy/max": 1.1015625, + "entropy/mean": 0.4375, + "entropy/min": 0.12451171875, + "epoch": 0.835, + "grad_norm": 1.2486714801507632, + "kl": 0.2470703125, + "learning_rate": 1.3397459621556128e-07, + "loss": 0.0025551673024892807, + "memory(GiB)": 137.36, + "reward": 1.9654521942138672, + "reward_std": 0.1467912495136261, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5538205504417419, + "rewards/EvidenceHallucination/std": 0.4164559543132782, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 2.4175124168395996, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.7594498991966248, + "rewards/VideoAccuracy/std": 0.5330292582511902, + "step": 835, + "train_speed(iter/s)": 0.113663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 696.0, + "completions/mean_length": 505.5476379394531, + "completions/min_length": 360.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.4140625, + "entropy/min": 0.2890625, + "epoch": 0.836, + "grad_norm": 1.3787251891598633, + "kl": 0.263671875, + "learning_rate": 1.3239229635525073e-07, + "loss": 0.0026535876095294952, + "memory(GiB)": 137.36, + "reward": 1.7774924039840698, + "reward_std": 0.31256455183029175, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5785791873931885, + "rewards/EvidenceHallucination/std": 0.46408405900001526, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 0.8742011189460754, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.661776602268219, + "rewards/VideoAccuracy/std": 0.43136221170425415, + "step": 836, + "train_speed(iter/s)": 0.112896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 753.0, + "completions/mean_length": 440.5476379394531, + "completions/min_length": 328.0, + "entropy/max": 0.578125, + "entropy/mean": 0.40625, + "entropy/min": 0.28125, + "epoch": 0.837, + "grad_norm": 1.327097986710748, + "kl": 0.283203125, + "learning_rate": 1.3081873329488392e-07, + "loss": 0.002853620797395706, + "memory(GiB)": 137.36, + "reward": 2.0167486667633057, + "reward_std": 0.39193081855773926, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6177124977111816, + "rewards/EvidenceHallucination/std": 0.4071316123008728, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 0.9122345447540283, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.7932060956954956, + "rewards/VideoAccuracy/std": 0.4522143006324768, + "step": 837, + "train_speed(iter/s)": 0.11227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 716.0, + "completions/mean_length": 502.76190185546875, + "completions/min_length": 352.0, + "entropy/max": 0.83984375, + "entropy/mean": 0.33203125, + "entropy/min": 0.08447265625, + "epoch": 0.838, + "grad_norm": 1.1545769651299362, + "kl": 0.2373046875, + "learning_rate": 1.2925392288022296e-07, + "loss": 0.0024269253481179476, + "memory(GiB)": 137.36, + "reward": 2.115060329437256, + "reward_std": 0.2253529280424118, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6825776100158691, + "rewards/EvidenceHallucination/std": 0.3816542327404022, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 1.0777013301849365, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8452115654945374, + "rewards/VideoAccuracy/std": 0.4711548089981079, + "step": 838, + "train_speed(iter/s)": 0.11117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1786.0, + "completions/mean_length": 523.6190795898438, + "completions/min_length": 269.0, + "entropy/max": 0.9765625, + "entropy/mean": 0.435546875, + "entropy/min": 0.2060546875, + "epoch": 0.839, + "grad_norm": 1.0796540588414187, + "kl": 0.271484375, + "learning_rate": 1.2769788086889132e-07, + "loss": 0.0028046760708093643, + "memory(GiB)": 137.36, + "reward": 1.6544684171676636, + "reward_std": 0.13489244878292084, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.49136263132095337, + "rewards/EvidenceHallucination/std": 0.43816834688186646, + "rewards/Evidence_Num_Record/mean": 5.142857074737549, + "rewards/Evidence_Num_Record/std": 3.6597445011138916, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5228625535964966, + "rewards/VideoAccuracy/std": 0.47375646233558655, + "step": 839, + "train_speed(iter/s)": 0.110558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 607.0, + "completions/mean_length": 402.16668701171875, + "completions/min_length": 241.0, + "entropy/max": 0.76953125, + "entropy/mean": 0.416015625, + "entropy/min": 0.271484375, + "epoch": 0.84, + "grad_norm": 1.3338339500336278, + "kl": 0.291015625, + "learning_rate": 1.2615062293021506e-07, + "loss": 0.0029495495837181807, + "memory(GiB)": 137.36, + "reward": 1.7845600843429565, + "reward_std": 0.11220282316207886, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.528125524520874, + "rewards/EvidenceHallucination/std": 0.44493892788887024, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.8025076985359192, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6456018090248108, + "rewards/VideoAccuracy/std": 0.5069880485534668, + "step": 840, + "train_speed(iter/s)": 0.109998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 931.0, + "completions/mean_length": 549.1190795898438, + "completions/min_length": 354.0, + "entropy/max": 0.490234375, + "entropy/mean": 0.275390625, + "entropy/min": 0.1259765625, + "epoch": 0.841, + "grad_norm": 1.0705962781580627, + "kl": 0.2158203125, + "learning_rate": 1.2461216464506452e-07, + "loss": 0.002181172836571932, + "memory(GiB)": 137.36, + "reward": 2.2544729709625244, + "reward_std": 0.1595495343208313, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5502666234970093, + "rewards/EvidenceHallucination/std": 0.427486389875412, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.7213357090950012, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9444196224212646, + "rewards/VideoAccuracy/std": 0.4263104200363159, + "step": 841, + "train_speed(iter/s)": 0.109322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 805.0, + "completions/mean_length": 478.19049072265625, + "completions/min_length": 316.0, + "entropy/max": 1.1875, + "entropy/mean": 0.5, + "entropy/min": 0.302734375, + "epoch": 0.842, + "grad_norm": 1.3309944067739075, + "kl": 0.275390625, + "learning_rate": 1.230825215056971e-07, + "loss": 0.0028127585537731647, + "memory(GiB)": 137.36, + "reward": 1.7678550481796265, + "reward_std": 0.2609485387802124, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5329591035842896, + "rewards/EvidenceHallucination/std": 0.45397794246673584, + "rewards/Evidence_Num_Record/mean": 4.523809432983398, + "rewards/Evidence_Num_Record/std": 1.3477731943130493, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.599358320236206, + "rewards/VideoAccuracy/std": 0.42538169026374817, + "step": 842, + "train_speed(iter/s)": 0.108668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 725.0, + "completions/mean_length": 440.73809814453125, + "completions/min_length": 347.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.412109375, + "entropy/min": 0.2333984375, + "epoch": 0.843, + "grad_norm": 1.3374249909047569, + "kl": 0.283203125, + "learning_rate": 1.2156170891560258e-07, + "loss": 0.002839302644133568, + "memory(GiB)": 137.36, + "reward": 1.734251856803894, + "reward_std": 0.4089145064353943, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5525057911872864, + "rewards/EvidenceHallucination/std": 0.4349856674671173, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 1.2650946378707886, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6237506866455078, + "rewards/VideoAccuracy/std": 0.4713672697544098, + "step": 843, + "train_speed(iter/s)": 0.10802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 906.0, + "completions/mean_length": 569.7142944335938, + "completions/min_length": 368.0, + "entropy/max": 0.6953125, + "entropy/mean": 0.330078125, + "entropy/min": 0.1396484375, + "epoch": 0.844, + "grad_norm": 1.1355950308818543, + "kl": 0.2314453125, + "learning_rate": 1.2004974218934695e-07, + "loss": 0.0023405367974191904, + "memory(GiB)": 137.36, + "reward": 1.9341176748275757, + "reward_std": 0.10934612154960632, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3932000696659088, + "rewards/EvidenceHallucination/std": 0.4373791217803955, + "rewards/Evidence_Num_Record/mean": 4.547619342803955, + "rewards/Evidence_Num_Record/std": 1.253334641456604, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6888108253479004, + "rewards/VideoAccuracy/std": 0.4891456365585327, + "step": 844, + "train_speed(iter/s)": 0.107671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 824.0, + "completions/mean_length": 487.7857360839844, + "completions/min_length": 244.0, + "entropy/max": 0.8984375, + "entropy/mean": 0.384765625, + "entropy/min": 0.140625, + "epoch": 0.845, + "grad_norm": 0.9592076721527633, + "kl": 0.25390625, + "learning_rate": 1.1854663655241804e-07, + "loss": 0.0025939876213669777, + "memory(GiB)": 137.36, + "reward": 1.7634539604187012, + "reward_std": 0.1565001904964447, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3028511703014374, + "rewards/EvidenceHallucination/std": 0.4015829861164093, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.1974129676818848, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.6076455116271973, + "rewards/VideoAccuracy/std": 0.5249733328819275, + "step": 845, + "train_speed(iter/s)": 0.107127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 884.0, + "completions/mean_length": 471.3333435058594, + "completions/min_length": 274.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.44140625, + "entropy/min": 0.279296875, + "epoch": 0.846, + "grad_norm": 1.2116800723488819, + "kl": 0.26953125, + "learning_rate": 1.1705240714107301e-07, + "loss": 0.0027307234704494476, + "memory(GiB)": 137.36, + "reward": 1.3947242498397827, + "reward_std": 0.18065707385540009, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.23713630437850952, + "rewards/EvidenceHallucination/std": 0.41608914732933044, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.9422956109046936, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.2901540696620941, + "rewards/VideoAccuracy/std": 0.3921188414096832, + "step": 846, + "train_speed(iter/s)": 0.10601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 644.0, + "completions/mean_length": 454.5476379394531, + "completions/min_length": 320.0, + "entropy/max": 0.625, + "entropy/mean": 0.41796875, + "entropy/min": 0.2099609375, + "epoch": 0.847, + "grad_norm": 1.215591605280868, + "kl": 0.283203125, + "learning_rate": 1.1556706900218572e-07, + "loss": 0.002849389798939228, + "memory(GiB)": 137.36, + "reward": 2.086452007293701, + "reward_std": 0.2277895212173462, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6672782301902771, + "rewards/EvidenceHallucination/std": 0.43294113874435425, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 1.1435829401016235, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.862520158290863, + "rewards/VideoAccuracy/std": 0.5838239789009094, + "step": 847, + "train_speed(iter/s)": 0.10549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1009.0, + "completions/mean_length": 611.0714111328125, + "completions/min_length": 381.0, + "entropy/max": 0.6796875, + "entropy/mean": 0.306640625, + "entropy/min": 0.166015625, + "epoch": 0.848, + "grad_norm": 0.6460856343573428, + "kl": 0.2099609375, + "learning_rate": 1.140906370930944e-07, + "loss": 0.0021102060563862324, + "memory(GiB)": 137.36, + "reward": 1.7929471731185913, + "reward_std": 0.03765248879790306, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4066600501537323, + "rewards/EvidenceHallucination/std": 0.4503607451915741, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.2570359706878662, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5782817602157593, + "rewards/VideoAccuracy/std": 0.5222153067588806, + "step": 848, + "train_speed(iter/s)": 0.104894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 692.0, + "completions/mean_length": 470.2857360839844, + "completions/min_length": 333.0, + "entropy/max": 1.671875, + "entropy/mean": 0.470703125, + "entropy/min": 0.2421875, + "epoch": 0.849, + "grad_norm": 1.1901820052487082, + "kl": 0.267578125, + "learning_rate": 1.1262312628145209e-07, + "loss": 0.002717760857194662, + "memory(GiB)": 137.36, + "reward": 1.6252732276916504, + "reward_std": 0.2671740651130676, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4483039081096649, + "rewards/EvidenceHallucination/std": 0.47909459471702576, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.188407063484192, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.46894586086273193, + "rewards/VideoAccuracy/std": 0.4558504819869995, + "step": 849, + "train_speed(iter/s)": 0.104379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 691.0, + "completions/mean_length": 454.3809509277344, + "completions/min_length": 299.0, + "entropy/max": 0.65234375, + "entropy/mean": 0.42578125, + "entropy/min": 0.2890625, + "epoch": 0.85, + "grad_norm": 1.2423254127926724, + "kl": 0.267578125, + "learning_rate": 1.1116455134507663e-07, + "loss": 0.002680886536836624, + "memory(GiB)": 137.36, + "reward": 1.6951377391815186, + "reward_std": 0.19957676529884338, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4258413016796112, + "rewards/EvidenceHallucination/std": 0.45843377709388733, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.8530195355415344, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5480645895004272, + "rewards/VideoAccuracy/std": 0.5059059262275696, + "step": 850, + "train_speed(iter/s)": 0.103933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 753.0, + "completions/mean_length": 511.4761962890625, + "completions/min_length": 355.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.3203125, + "entropy/min": 0.107421875, + "epoch": 0.851, + "grad_norm": 1.0218613332915403, + "kl": 0.2236328125, + "learning_rate": 1.0971492697180096e-07, + "loss": 0.002279686275869608, + "memory(GiB)": 137.36, + "reward": 2.298257350921631, + "reward_std": 0.2220420241355896, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6890178918838501, + "rewards/EvidenceHallucination/std": 0.37429994344711304, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.9830147624015808, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.969977617263794, + "rewards/VideoAccuracy/std": 0.38433244824409485, + "step": 851, + "train_speed(iter/s)": 0.103381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 874.0, + "completions/mean_length": 491.3333435058594, + "completions/min_length": 245.0, + "entropy/max": 1.2109375, + "entropy/mean": 0.47265625, + "entropy/min": 0.19921875, + "epoch": 0.852, + "grad_norm": 1.2509115609704713, + "kl": 0.275390625, + "learning_rate": 1.0827426775932657e-07, + "loss": 0.0027865557931363583, + "memory(GiB)": 137.36, + "reward": 1.699042797088623, + "reward_std": 0.28074491024017334, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5076565742492676, + "rewards/EvidenceHallucination/std": 0.47236886620521545, + "rewards/Evidence_Num_Record/mean": 4.761904716491699, + "rewards/Evidence_Num_Record/std": 1.2259297370910645, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5356067419052124, + "rewards/VideoAccuracy/std": 0.46379873156547546, + "step": 852, + "train_speed(iter/s)": 0.102783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 602.0, + "completions/mean_length": 429.5, + "completions/min_length": 307.0, + "entropy/max": 0.89453125, + "entropy/mean": 0.48046875, + "entropy/min": 0.283203125, + "epoch": 0.853, + "grad_norm": 1.368874538721325, + "kl": 0.275390625, + "learning_rate": 1.0684258821507618e-07, + "loss": 0.0027739896904677153, + "memory(GiB)": 137.36, + "reward": 1.7046847343444824, + "reward_std": 0.44494467973709106, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5178847908973694, + "rewards/EvidenceHallucination/std": 0.42951691150665283, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.5961549282073975, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6011077165603638, + "rewards/VideoAccuracy/std": 0.4659145176410675, + "step": 853, + "train_speed(iter/s)": 0.102279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 690.0, + "completions/mean_length": 457.5476379394531, + "completions/min_length": 333.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.330078125, + "entropy/min": 0.1416015625, + "epoch": 0.854, + "grad_norm": 1.151241175784308, + "kl": 0.271484375, + "learning_rate": 1.0541990275604628e-07, + "loss": 0.0027510307263582945, + "memory(GiB)": 137.36, + "reward": 2.28006911277771, + "reward_std": 0.09352787584066391, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7334737181663513, + "rewards/EvidenceHallucination/std": 0.3510028123855591, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.9833102226257324, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9667075276374817, + "rewards/VideoAccuracy/std": 0.4173412024974823, + "step": 854, + "train_speed(iter/s)": 0.101843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1199.0, + "completions/mean_length": 488.90478515625, + "completions/min_length": 327.0, + "entropy/max": 1.46875, + "entropy/mean": 0.396484375, + "entropy/min": 0.1494140625, + "epoch": 0.855, + "grad_norm": 1.136655172700135, + "kl": 0.25390625, + "learning_rate": 1.0400622570866425e-07, + "loss": 0.0026231100782752037, + "memory(GiB)": 137.36, + "reward": 1.914324164390564, + "reward_std": 0.13127605617046356, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5789222121238708, + "rewards/EvidenceHallucination/std": 0.427633672952652, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.7598239183425903, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.6985397934913635, + "rewards/VideoAccuracy/std": 0.4905802309513092, + "step": 855, + "train_speed(iter/s)": 0.101345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 714.0, + "completions/mean_length": 488.5238037109375, + "completions/min_length": 343.0, + "entropy/max": 0.72265625, + "entropy/mean": 0.419921875, + "entropy/min": 0.265625, + "epoch": 0.856, + "grad_norm": 1.2209167269679986, + "kl": 0.26953125, + "learning_rate": 1.0260157130864177e-07, + "loss": 0.0027050748467445374, + "memory(GiB)": 137.36, + "reward": 1.7837170362472534, + "reward_std": 0.2790336608886719, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.566267192363739, + "rewards/EvidenceHallucination/std": 0.4373977482318878, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 1.2061110734939575, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.6418920159339905, + "rewards/VideoAccuracy/std": 0.429427832365036, + "step": 856, + "train_speed(iter/s)": 0.100324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 612.0, + "completions/mean_length": 461.66668701171875, + "completions/min_length": 321.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.400390625, + "entropy/min": 0.267578125, + "epoch": 0.857, + "grad_norm": 1.1477013837218386, + "kl": 0.28125, + "learning_rate": 1.0120595370083318e-07, + "loss": 0.0028410381637513638, + "memory(GiB)": 137.36, + "reward": 1.9754301309585571, + "reward_std": 0.0912606343626976, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6005602478981018, + "rewards/EvidenceHallucination/std": 0.45697399973869324, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 0.8781778216362, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.7553179860115051, + "rewards/VideoAccuracy/std": 0.5490778684616089, + "step": 857, + "train_speed(iter/s)": 0.099872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 842.0, + "completions/mean_length": 579.0714111328125, + "completions/min_length": 369.0, + "entropy/max": 0.734375, + "entropy/mean": 0.31640625, + "entropy/min": 0.1357421875, + "epoch": 0.858, + "grad_norm": 0.6323285661858963, + "kl": 0.21875, + "learning_rate": 9.981938693909219e-08, + "loss": 0.002220054157078266, + "memory(GiB)": 137.36, + "reward": 1.7847347259521484, + "reward_std": 0.15355658531188965, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.34096771478652954, + "rewards/EvidenceHallucination/std": 0.44314488768577576, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 0.9258201122283936, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5832078456878662, + "rewards/VideoAccuracy/std": 0.5091155171394348, + "step": 858, + "train_speed(iter/s)": 0.099318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 817.0, + "completions/mean_length": 493.8095397949219, + "completions/min_length": 312.0, + "entropy/max": 1.015625, + "entropy/mean": 0.4921875, + "entropy/min": 0.314453125, + "epoch": 0.859, + "grad_norm": 1.250741159341449, + "kl": 0.265625, + "learning_rate": 9.844188498613115e-08, + "loss": 0.0026930745225399733, + "memory(GiB)": 137.36, + "reward": 1.8230197429656982, + "reward_std": 0.07269556820392609, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5855136513710022, + "rewards/EvidenceHallucination/std": 0.45245108008384705, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 1.2287685871124268, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500064849853516, + "rewards/VideoAccuracy/mean": 0.6535361409187317, + "rewards/VideoAccuracy/std": 0.39689022302627563, + "step": 859, + "train_speed(iter/s)": 0.098873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 644.0, + "completions/mean_length": 456.0476379394531, + "completions/min_length": 334.0, + "entropy/max": 0.49609375, + "entropy/mean": 0.40234375, + "entropy/min": 0.294921875, + "epoch": 0.86, + "grad_norm": 1.0915671993006, + "kl": 0.283203125, + "learning_rate": 9.707346171337893e-08, + "loss": 0.0028428146615624428, + "memory(GiB)": 137.36, + "reward": 1.299574851989746, + "reward_std": 0.2713920772075653, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.20511110126972198, + "rewards/EvidenceHallucination/std": 0.3983006775379181, + "rewards/Evidence_Num_Record/mean": 3.809523820877075, + "rewards/Evidence_Num_Record/std": 0.7066960334777832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.22521927952766418, + "rewards/VideoAccuracy/std": 0.4099663496017456, + "step": 860, + "train_speed(iter/s)": 0.098626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 873.0, + "completions/mean_length": 544.5, + "completions/min_length": 354.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.29296875, + "entropy/min": 0.11572265625, + "epoch": 0.861, + "grad_norm": 1.0238063997560405, + "kl": 0.2197265625, + "learning_rate": 9.57141309008428e-08, + "loss": 0.0022320393472909927, + "memory(GiB)": 137.36, + "reward": 2.289512872695923, + "reward_std": 0.2071124017238617, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6585298180580139, + "rewards/EvidenceHallucination/std": 0.41074296832084656, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.6115421056747437, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430335700511932, + "rewards/VideoAccuracy/mean": 0.9625687599182129, + "rewards/VideoAccuracy/std": 0.27861785888671875, + "step": 861, + "train_speed(iter/s)": 0.098147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 818.0, + "completions/mean_length": 493.1428527832031, + "completions/min_length": 363.0, + "entropy/max": 1.09375, + "entropy/mean": 0.494140625, + "entropy/min": 0.2890625, + "epoch": 0.862, + "grad_norm": 1.360844426339108, + "kl": 0.271484375, + "learning_rate": 9.43639062369691e-08, + "loss": 0.0027274617459625006, + "memory(GiB)": 137.36, + "reward": 1.956334114074707, + "reward_std": 0.2271970808506012, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7514772415161133, + "rewards/EvidenceHallucination/std": 0.3589233458042145, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.0581248998641968, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.261904776096344, + "rewards/HonestTime/std": 0.44500061869621277, + "rewards/VideoAccuracy/mean": 0.7536576986312866, + "rewards/VideoAccuracy/std": 0.3471892178058624, + "step": 862, + "train_speed(iter/s)": 0.0977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 556.0, + "completions/mean_length": 417.5714416503906, + "completions/min_length": 311.0, + "entropy/max": 0.609375, + "entropy/mean": 0.419921875, + "entropy/min": 0.25390625, + "epoch": 0.863, + "grad_norm": 1.34293589689136, + "kl": 0.298828125, + "learning_rate": 9.302280131850537e-08, + "loss": 0.0029917543288320303, + "memory(GiB)": 137.36, + "reward": 1.5938074588775635, + "reward_std": 0.3932831287384033, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4868219494819641, + "rewards/EvidenceHallucination/std": 0.4731104373931885, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.7066960334777832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.496442973613739, + "rewards/VideoAccuracy/std": 0.48178577423095703, + "step": 863, + "train_speed(iter/s)": 0.097267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 675.0, + "completions/mean_length": 466.2857360839844, + "completions/min_length": 337.0, + "entropy/max": 0.5859375, + "entropy/mean": 0.35546875, + "entropy/min": 0.2197265625, + "epoch": 0.864, + "grad_norm": 1.3350307015516478, + "kl": 0.26953125, + "learning_rate": 9.169082965036279e-08, + "loss": 0.002724633552134037, + "memory(GiB)": 137.36, + "reward": 1.9755473136901855, + "reward_std": 0.145391583442688, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.46336421370506287, + "rewards/EvidenceHallucination/std": 0.44457826018333435, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.9865530729293823, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7162078022956848, + "rewards/VideoAccuracy/std": 0.46055346727371216, + "step": 864, + "train_speed(iter/s)": 0.096809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1012.0, + "completions/mean_length": 562.6428833007812, + "completions/min_length": 296.0, + "entropy/max": 1.7890625, + "entropy/mean": 0.458984375, + "entropy/min": 0.1474609375, + "epoch": 0.865, + "grad_norm": 1.0681975181453227, + "kl": 0.2265625, + "learning_rate": 9.036800464548156e-08, + "loss": 0.0022891086991876364, + "memory(GiB)": 137.36, + "reward": 1.952393651008606, + "reward_std": 0.16332533955574036, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5214911103248596, + "rewards/EvidenceHallucination/std": 0.40741944313049316, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.2869396209716797, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.7480955719947815, + "rewards/VideoAccuracy/std": 0.4404449760913849, + "step": 865, + "train_speed(iter/s)": 0.096226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 667.0, + "completions/mean_length": 486.3809509277344, + "completions/min_length": 357.0, + "entropy/max": 0.83984375, + "entropy/mean": 0.453125, + "entropy/min": 0.318359375, + "epoch": 0.866, + "grad_norm": 1.2752987021411784, + "kl": 0.267578125, + "learning_rate": 8.905433962469488e-08, + "loss": 0.002698513213545084, + "memory(GiB)": 137.36, + "reward": 1.9704946279525757, + "reward_std": 0.11606656759977341, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7517710328102112, + "rewards/EvidenceHallucination/std": 0.3492864966392517, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 0.9323829412460327, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.786807119846344, + "rewards/VideoAccuracy/std": 0.3002590239048004, + "step": 866, + "train_speed(iter/s)": 0.095796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 635.0, + "completions/mean_length": 447.0238037109375, + "completions/min_length": 265.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.41015625, + "entropy/min": 0.296875, + "epoch": 0.867, + "grad_norm": 1.274235978961333, + "kl": 0.27734375, + "learning_rate": 8.774984781659468e-08, + "loss": 0.002792106010019779, + "memory(GiB)": 137.36, + "reward": 1.7536741495132446, + "reward_std": 0.22508135437965393, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3733477294445038, + "rewards/EvidenceHallucination/std": 0.42099037766456604, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.9055256247520447, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.5837664008140564, + "rewards/VideoAccuracy/std": 0.5010433197021484, + "step": 867, + "train_speed(iter/s)": 0.095443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1025.0, + "completions/mean_length": 591.7857055664062, + "completions/min_length": 335.0, + "entropy/max": 0.5625, + "entropy/mean": 0.318359375, + "entropy/min": 0.13671875, + "epoch": 0.868, + "grad_norm": 0.6722452828309774, + "kl": 0.2080078125, + "learning_rate": 8.645454235739902e-08, + "loss": 0.002102708211168647, + "memory(GiB)": 137.41, + "reward": 1.9160841703414917, + "reward_std": 0.08268817514181137, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5511731505393982, + "rewards/EvidenceHallucination/std": 0.42706412076950073, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.1323559284210205, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6725161075592041, + "rewards/VideoAccuracy/std": 0.5182072520256042, + "step": 868, + "train_speed(iter/s)": 0.094723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 896.0, + "completions/mean_length": 500.8571472167969, + "completions/min_length": 334.0, + "entropy/max": 1.3359375, + "entropy/mean": 0.49609375, + "entropy/min": 0.287109375, + "epoch": 0.869, + "grad_norm": 0.789281956188977, + "kl": 0.275390625, + "learning_rate": 8.516843629081982e-08, + "loss": 0.002797728404402733, + "memory(GiB)": 137.41, + "reward": 1.5103896856307983, + "reward_std": 0.07082901895046234, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3638714551925659, + "rewards/EvidenceHallucination/std": 0.451041042804718, + "rewards/Evidence_Num_Record/mean": 4.761904716491699, + "rewards/Evidence_Num_Record/std": 1.7503941059112549, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.3709486722946167, + "rewards/VideoAccuracy/std": 0.40454044938087463, + "step": 869, + "train_speed(iter/s)": 0.094391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 697.0, + "completions/mean_length": 445.73809814453125, + "completions/min_length": 307.0, + "entropy/max": 0.59765625, + "entropy/mean": 0.42578125, + "entropy/min": 0.30078125, + "epoch": 0.87, + "grad_norm": 1.1826294572355491, + "kl": 0.29296875, + "learning_rate": 8.38915425679304e-08, + "loss": 0.0029431432485580444, + "memory(GiB)": 137.41, + "reward": 1.4018317461013794, + "reward_std": 0.2824872136116028, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3030472993850708, + "rewards/EvidenceHallucination/std": 0.4373262822628021, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.8742011785507202, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3078889548778534, + "rewards/VideoAccuracy/std": 0.38509246706962585, + "step": 870, + "train_speed(iter/s)": 0.094011 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 792.0, + "completions/mean_length": 540.357177734375, + "completions/min_length": 365.0, + "entropy/max": 0.47265625, + "entropy/mean": 0.27734375, + "entropy/min": 0.13671875, + "epoch": 0.871, + "grad_norm": 1.0241036917869137, + "kl": 0.2216796875, + "learning_rate": 8.262387404703653e-08, + "loss": 0.002246022457256913, + "memory(GiB)": 137.41, + "reward": 2.128763437271118, + "reward_std": 0.10026715695858002, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5795870423316956, + "rewards/EvidenceHallucination/std": 0.4270126521587372, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.0017406940460205, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8176080584526062, + "rewards/VideoAccuracy/std": 0.3870820999145508, + "step": 871, + "train_speed(iter/s)": 0.093529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 893.0, + "completions/mean_length": 547.6428833007812, + "completions/min_length": 356.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.484375, + "entropy/min": 0.240234375, + "epoch": 0.872, + "grad_norm": 1.2728577199743507, + "kl": 0.2431640625, + "learning_rate": 8.136544349354668e-08, + "loss": 0.0024901535362005234, + "memory(GiB)": 137.41, + "reward": 1.6112569570541382, + "reward_std": 0.3500445783138275, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4395892918109894, + "rewards/EvidenceHallucination/std": 0.4364795684814453, + "rewards/Evidence_Num_Record/mean": 5.0, + "rewards/Evidence_Num_Record/std": 1.6527878046035767, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722997188568115, + "rewards/VideoAccuracy/mean": 0.4661962389945984, + "rewards/VideoAccuracy/std": 0.43624967336654663, + "step": 872, + "train_speed(iter/s)": 0.093007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 593.0, + "completions/mean_length": 406.73809814453125, + "completions/min_length": 250.0, + "entropy/max": 0.734375, + "entropy/mean": 0.484375, + "entropy/min": 0.294921875, + "epoch": 0.873, + "grad_norm": 1.1136326946388309, + "kl": 0.291015625, + "learning_rate": 8.01162635798418e-08, + "loss": 0.00293728057295084, + "memory(GiB)": 137.41, + "reward": 1.378351092338562, + "reward_std": 0.18517830967903137, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.28729623556137085, + "rewards/EvidenceHallucination/std": 0.4380473494529724, + "rewards/Evidence_Num_Record/mean": 3.3809523582458496, + "rewards/Evidence_Num_Record/std": 0.8540400862693787, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.3208918273448944, + "rewards/VideoAccuracy/std": 0.43048128485679626, + "step": 873, + "train_speed(iter/s)": 0.092613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1011.0, + "completions/mean_length": 487.452392578125, + "completions/min_length": 254.0, + "entropy/max": 0.640625, + "entropy/mean": 0.3671875, + "entropy/min": 0.1533203125, + "epoch": 0.874, + "grad_norm": 1.138857625156919, + "kl": 0.263671875, + "learning_rate": 7.887634688515e-08, + "loss": 0.0026560970582067966, + "memory(GiB)": 137.41, + "reward": 1.9813612699508667, + "reward_std": 0.15701758861541748, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4765368103981018, + "rewards/EvidenceHallucination/std": 0.48360008001327515, + "rewards/Evidence_Num_Record/mean": 3.452381134033203, + "rewards/Evidence_Num_Record/std": 0.66999751329422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.7241489887237549, + "rewards/VideoAccuracy/std": 0.41951704025268555, + "step": 874, + "train_speed(iter/s)": 0.092173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1131.0, + "completions/mean_length": 573.0238037109375, + "completions/min_length": 268.0, + "entropy/max": 0.73046875, + "entropy/mean": 0.376953125, + "entropy/min": 0.1328125, + "epoch": 0.875, + "grad_norm": 1.2268666216419313, + "kl": 0.203125, + "learning_rate": 7.764570589541875e-08, + "loss": 0.0021050162613391876, + "memory(GiB)": 137.41, + "reward": 1.972877860069275, + "reward_std": 0.15945619344711304, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6702647805213928, + "rewards/EvidenceHallucination/std": 0.4148913025856018, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 1.555030107498169, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.7388249635696411, + "rewards/VideoAccuracy/std": 0.34591543674468994, + "step": 875, + "train_speed(iter/s)": 0.091599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 805.0, + "completions/mean_length": 473.3809509277344, + "completions/min_length": 363.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.435546875, + "entropy/min": 0.232421875, + "epoch": 0.876, + "grad_norm": 1.2740935503447899, + "kl": 0.279296875, + "learning_rate": 7.642435300318906e-08, + "loss": 0.002823675749823451, + "memory(GiB)": 137.41, + "reward": 1.4506222009658813, + "reward_std": 0.34449562430381775, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.32069632411003113, + "rewards/EvidenceHallucination/std": 0.4599875807762146, + "rewards/Evidence_Num_Record/mean": 3.809523820877075, + "rewards/Evidence_Num_Record/std": 0.7726449966430664, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3531496524810791, + "rewards/VideoAccuracy/std": 0.42197105288505554, + "step": 876, + "train_speed(iter/s)": 0.091264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 625.0, + "completions/mean_length": 461.73809814453125, + "completions/min_length": 349.0, + "entropy/max": 0.578125, + "entropy/mean": 0.412109375, + "entropy/min": 0.29296875, + "epoch": 0.877, + "grad_norm": 1.1481544117424332, + "kl": 0.287109375, + "learning_rate": 7.521230050747085e-08, + "loss": 0.002876377198845148, + "memory(GiB)": 137.41, + "reward": 1.7416545152664185, + "reward_std": 0.2787608802318573, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3635919392108917, + "rewards/EvidenceHallucination/std": 0.47230517864227295, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.7589956521987915, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.568936288356781, + "rewards/VideoAccuracy/std": 0.5389463901519775, + "step": 877, + "train_speed(iter/s)": 0.090939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1051.0, + "completions/mean_length": 621.452392578125, + "completions/min_length": 351.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.345703125, + "entropy/min": 0.125, + "epoch": 0.878, + "grad_norm": 0.9383968788323271, + "kl": 0.1962890625, + "learning_rate": 7.400956061361974e-08, + "loss": 0.002009030431509018, + "memory(GiB)": 137.41, + "reward": 1.842140793800354, + "reward_std": 0.17816469073295593, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45321619510650635, + "rewards/EvidenceHallucination/std": 0.46109721064567566, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 1.0155584812164307, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.6181641817092896, + "rewards/VideoAccuracy/std": 0.47660982608795166, + "step": 878, + "train_speed(iter/s)": 0.09053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1120.0, + "completions/mean_length": 491.7857360839844, + "completions/min_length": 352.0, + "entropy/max": 0.78125, + "entropy/mean": 0.421875, + "entropy/min": 0.22265625, + "epoch": 0.879, + "grad_norm": 1.1225403470000348, + "kl": 0.2490234375, + "learning_rate": 7.281614543321269e-08, + "loss": 0.0025496752932667732, + "memory(GiB)": 137.41, + "reward": 1.6172040700912476, + "reward_std": 0.21314892172813416, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3995124399662018, + "rewards/EvidenceHallucination/std": 0.45061445236206055, + "rewards/Evidence_Num_Record/mean": 4.785714149475098, + "rewards/Evidence_Num_Record/std": 1.5227657556533813, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.47063493728637695, + "rewards/VideoAccuracy/std": 0.43577468395233154, + "step": 879, + "train_speed(iter/s)": 0.090139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 659.0, + "completions/mean_length": 468.19049072265625, + "completions/min_length": 347.0, + "entropy/max": 0.6640625, + "entropy/mean": 0.453125, + "entropy/min": 0.2421875, + "epoch": 0.88, + "grad_norm": 1.3583495100760876, + "kl": 0.28125, + "learning_rate": 7.163206698392742e-08, + "loss": 0.0028377859853208065, + "memory(GiB)": 137.41, + "reward": 1.78130304813385, + "reward_std": 0.3314023017883301, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.564808189868927, + "rewards/EvidenceHallucination/std": 0.47428396344184875, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 0.9236221313476562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6350079774856567, + "rewards/VideoAccuracy/std": 0.3911062777042389, + "step": 880, + "train_speed(iter/s)": 0.089636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 890.0, + "completions/mean_length": 610.2380981445312, + "completions/min_length": 385.0, + "entropy/max": 0.546875, + "entropy/mean": 0.248046875, + "entropy/min": 0.12255859375, + "epoch": 0.881, + "grad_norm": 0.9674632861169625, + "kl": 0.1982421875, + "learning_rate": 7.045733718942093e-08, + "loss": 0.001996932551264763, + "memory(GiB)": 137.41, + "reward": 2.190370798110962, + "reward_std": 0.09444907307624817, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5754888653755188, + "rewards/EvidenceHallucination/std": 0.42112043499946594, + "rewards/Evidence_Num_Record/mean": 4.333333492279053, + "rewards/Evidence_Num_Record/std": 0.9283257722854614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8752728700637817, + "rewards/VideoAccuracy/std": 0.47302642464637756, + "step": 881, + "train_speed(iter/s)": 0.089201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 969.0, + "completions/mean_length": 537.0238037109375, + "completions/min_length": 375.0, + "entropy/max": 1.0078125, + "entropy/mean": 0.462890625, + "entropy/min": 0.287109375, + "epoch": 0.882, + "grad_norm": 1.0925018062641088, + "kl": 0.2470703125, + "learning_rate": 6.929196787920898e-08, + "loss": 0.0025138126220554113, + "memory(GiB)": 137.41, + "reward": 1.5892995595932007, + "reward_std": 0.17472247779369354, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4005925953388214, + "rewards/EvidenceHallucination/std": 0.4204840660095215, + "rewards/Evidence_Num_Record/mean": 5.309524059295654, + "rewards/Evidence_Num_Record/std": 1.7598239183425903, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.44727635383605957, + "rewards/VideoAccuracy/std": 0.46331316232681274, + "step": 882, + "train_speed(iter/s)": 0.088791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 680.0, + "completions/mean_length": 441.5952453613281, + "completions/min_length": 271.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.474609375, + "entropy/min": 0.337890625, + "epoch": 0.883, + "grad_norm": 1.3718549355783753, + "kl": 0.283203125, + "learning_rate": 6.813597078854771e-08, + "loss": 0.0028338287957012653, + "memory(GiB)": 137.41, + "reward": 1.533994197845459, + "reward_std": 0.32422956824302673, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.40407299995422363, + "rewards/EvidenceHallucination/std": 0.4569137990474701, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.9606062173843384, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4531796872615814, + "rewards/VideoAccuracy/std": 0.4807124733924866, + "step": 883, + "train_speed(iter/s)": 0.088476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 761.0, + "completions/mean_length": 475.76190185546875, + "completions/min_length": 269.0, + "entropy/max": 0.54296875, + "entropy/mean": 0.349609375, + "entropy/min": 0.1494140625, + "epoch": 0.884, + "grad_norm": 1.1966819555119639, + "kl": 0.271484375, + "learning_rate": 6.698935755831491e-08, + "loss": 0.002740682801231742, + "memory(GiB)": 137.41, + "reward": 2.1745049953460693, + "reward_std": 0.1737610399723053, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5192246437072754, + "rewards/EvidenceHallucination/std": 0.4441063702106476, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.9422956705093384, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.9039933681488037, + "rewards/VideoAccuracy/std": 0.4034653306007385, + "step": 884, + "train_speed(iter/s)": 0.088056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/mean_length": 516.2380981445312, + "completions/min_length": 331.0, + "entropy/max": 0.921875, + "entropy/mean": 0.447265625, + "entropy/min": 0.1513671875, + "epoch": 0.885, + "grad_norm": 1.2399208440837544, + "kl": 0.2333984375, + "learning_rate": 6.585213973489334e-08, + "loss": 0.002357909455895424, + "memory(GiB)": 137.41, + "reward": 2.129850387573242, + "reward_std": 0.20503221452236176, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6250701546669006, + "rewards/EvidenceHallucination/std": 0.4147346615791321, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.3236435651779175, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.9048364162445068, + "rewards/VideoAccuracy/std": 0.3743656873703003, + "step": 885, + "train_speed(iter/s)": 0.087565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 770.0, + "completions/mean_length": 458.1190490722656, + "completions/min_length": 276.0, + "entropy/max": 0.58984375, + "entropy/mean": 0.416015625, + "entropy/min": 0.28125, + "epoch": 0.886, + "grad_norm": 1.5532292659103142, + "kl": 0.275390625, + "learning_rate": 6.47243287700534e-08, + "loss": 0.002769982907921076, + "memory(GiB)": 137.41, + "reward": 1.8790358304977417, + "reward_std": 0.3673816919326782, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6465736031532288, + "rewards/EvidenceHallucination/std": 0.41805994510650635, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 1.0373399257659912, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7163876891136169, + "rewards/VideoAccuracy/std": 0.3747093677520752, + "step": 886, + "train_speed(iter/s)": 0.087111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/mean_length": 462.9285888671875, + "completions/min_length": 252.0, + "entropy/max": 1.0078125, + "entropy/mean": 0.453125, + "entropy/min": 0.232421875, + "epoch": 0.887, + "grad_norm": 1.463237188359426, + "kl": 0.2890625, + "learning_rate": 6.36059360208394e-08, + "loss": 0.0029151299968361855, + "memory(GiB)": 137.41, + "reward": 2.2597317695617676, + "reward_std": 0.24623370170593262, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7909353375434875, + "rewards/EvidenceHallucination/std": 0.28417184948921204, + "rewards/Evidence_Num_Record/mean": 3.9285714626312256, + "rewards/Evidence_Num_Record/std": 1.134661316871643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 1.00154447555542, + "rewards/VideoAccuracy/std": 0.3549065887928009, + "step": 887, + "train_speed(iter/s)": 0.086794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 729.0, + "completions/mean_length": 519.1428833007812, + "completions/min_length": 365.0, + "entropy/max": 2.015625, + "entropy/mean": 0.5, + "entropy/min": 0.1474609375, + "epoch": 0.888, + "grad_norm": 0.9038730392780432, + "kl": 0.21484375, + "learning_rate": 6.249697274945376e-08, + "loss": 0.0021830867044627666, + "memory(GiB)": 137.41, + "reward": 2.1366453170776367, + "reward_std": 0.11492623388767242, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5206296443939209, + "rewards/EvidenceHallucination/std": 0.4495990574359894, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 1.187673807144165, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.8991861343383789, + "rewards/VideoAccuracy/std": 0.43332386016845703, + "step": 888, + "train_speed(iter/s)": 0.086372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 663.0, + "completions/mean_length": 459.21429443359375, + "completions/min_length": 336.0, + "entropy/max": 0.6015625, + "entropy/mean": 0.431640625, + "entropy/min": 0.251953125, + "epoch": 0.889, + "grad_norm": 1.2730359639274378, + "kl": 0.2734375, + "learning_rate": 6.139745012314424e-08, + "loss": 0.0027574487030506134, + "memory(GiB)": 137.41, + "reward": 1.5221045017242432, + "reward_std": 0.2713756859302521, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29796910285949707, + "rewards/EvidenceHallucination/std": 0.4305155873298645, + "rewards/Evidence_Num_Record/mean": 4.095238208770752, + "rewards/Evidence_Num_Record/std": 1.1220521926879883, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.40536782145500183, + "rewards/VideoAccuracy/std": 0.4380069673061371, + "step": 889, + "train_speed(iter/s)": 0.08605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 578.0, + "completions/mean_length": 459.0952453613281, + "completions/min_length": 315.0, + "entropy/max": 0.63671875, + "entropy/mean": 0.451171875, + "entropy/min": 0.28515625, + "epoch": 0.89, + "grad_norm": 1.322065101605198, + "kl": 0.287109375, + "learning_rate": 6.030737921409168e-08, + "loss": 0.0028813458047807217, + "memory(GiB)": 137.41, + "reward": 1.7936220169067383, + "reward_std": 0.32686707377433777, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5598907470703125, + "rewards/EvidenceHallucination/std": 0.47290104627609253, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 0.6270147562026978, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6483103632926941, + "rewards/VideoAccuracy/std": 0.5596532225608826, + "step": 890, + "train_speed(iter/s)": 0.085746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 854.0, + "completions/mean_length": 546.6190795898438, + "completions/min_length": 354.0, + "entropy/max": 0.53515625, + "entropy/mean": 0.2890625, + "entropy/min": 0.1650390625, + "epoch": 0.891, + "grad_norm": 1.110845425550562, + "kl": 0.2119140625, + "learning_rate": 5.922677099929785e-08, + "loss": 0.0021479499991983175, + "memory(GiB)": 137.41, + "reward": 2.1060032844543457, + "reward_std": 0.19177255034446716, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5316746234893799, + "rewards/EvidenceHallucination/std": 0.4688880145549774, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7083376049995422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.8044303059577942, + "rewards/VideoAccuracy/std": 0.47716856002807617, + "step": 891, + "train_speed(iter/s)": 0.085401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 656.0, + "completions/mean_length": 490.6190490722656, + "completions/min_length": 349.0, + "entropy/max": 0.83984375, + "entropy/mean": 0.4765625, + "entropy/min": 0.248046875, + "epoch": 0.892, + "grad_norm": 1.217363528182602, + "kl": 0.271484375, + "learning_rate": 5.815563636047538e-08, + "loss": 0.0027382380794733763, + "memory(GiB)": 137.41, + "reward": 1.88263738155365, + "reward_std": 0.06644676625728607, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6405651569366455, + "rewards/EvidenceHallucination/std": 0.40935197472572327, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 0.9912509322166443, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.6926195621490479, + "rewards/VideoAccuracy/std": 0.3781779706478119, + "step": 892, + "train_speed(iter/s)": 0.085099 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 685.0, + "completions/mean_length": 404.0238037109375, + "completions/min_length": 284.0, + "entropy/max": 0.9375, + "entropy/mean": 0.44140625, + "entropy/min": 0.296875, + "epoch": 0.893, + "grad_norm": 1.3202198516584376, + "kl": 0.28515625, + "learning_rate": 5.709398608393834e-08, + "loss": 0.0028688169550150633, + "memory(GiB)": 137.41, + "reward": 1.6975760459899902, + "reward_std": 0.41498932242393494, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4748455286026001, + "rewards/EvidenceHallucination/std": 0.46483904123306274, + "rewards/Evidence_Num_Record/mean": 3.3333334922790527, + "rewards/Evidence_Num_Record/std": 0.845841109752655, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6026068925857544, + "rewards/VideoAccuracy/std": 0.44790560007095337, + "step": 893, + "train_speed(iter/s)": 0.084791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 721.0, + "completions/mean_length": 461.71429443359375, + "completions/min_length": 292.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.3515625, + "entropy/min": 0.19140625, + "epoch": 0.894, + "grad_norm": 1.077120693480762, + "kl": 0.259765625, + "learning_rate": 5.604183086049341e-08, + "loss": 0.0026304530911147594, + "memory(GiB)": 137.41, + "reward": 2.1575303077697754, + "reward_std": 0.07329066842794418, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5564228296279907, + "rewards/EvidenceHallucination/std": 0.43415695428848267, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 0.9385906457901001, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8795791864395142, + "rewards/VideoAccuracy/std": 0.6343855261802673, + "step": 894, + "train_speed(iter/s)": 0.084489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 732.0, + "completions/mean_length": 502.66668701171875, + "completions/min_length": 314.0, + "entropy/max": 1.4375, + "entropy/mean": 0.404296875, + "entropy/min": 0.1552734375, + "epoch": 0.895, + "grad_norm": 1.0951229106785407, + "kl": 0.2412109375, + "learning_rate": 5.499918128533154e-08, + "loss": 0.0024560079909861088, + "memory(GiB)": 137.41, + "reward": 1.926005244255066, + "reward_std": 0.12339666485786438, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5627981424331665, + "rewards/EvidenceHallucination/std": 0.4098535180091858, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.9358023405075073, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.7134456038475037, + "rewards/VideoAccuracy/std": 0.3909820020198822, + "step": 895, + "train_speed(iter/s)": 0.084102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 590.0, + "completions/mean_length": 455.73809814453125, + "completions/min_length": 345.0, + "entropy/max": 0.5625, + "entropy/mean": 0.404296875, + "entropy/min": 0.296875, + "epoch": 0.896, + "grad_norm": 1.2009173901435557, + "kl": 0.294921875, + "learning_rate": 5.39660478579228e-08, + "loss": 0.002986327512189746, + "memory(GiB)": 137.41, + "reward": 1.6633399724960327, + "reward_std": 0.07493476569652557, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5234918594360352, + "rewards/EvidenceHallucination/std": 0.48352640867233276, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.5808600187301636, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.5348320007324219, + "rewards/VideoAccuracy/std": 0.462007999420166, + "step": 896, + "train_speed(iter/s)": 0.083796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 630.0, + "completions/mean_length": 468.5714416503906, + "completions/min_length": 321.0, + "entropy/max": 0.69921875, + "entropy/mean": 0.4140625, + "entropy/min": 0.283203125, + "epoch": 0.897, + "grad_norm": 1.4394225019863833, + "kl": 0.2890625, + "learning_rate": 5.294244098190925e-08, + "loss": 0.0029065608978271484, + "memory(GiB)": 137.41, + "reward": 2.087952136993408, + "reward_std": 0.3857279419898987, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6915156841278076, + "rewards/EvidenceHallucination/std": 0.3970063030719757, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.7624308466911316, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8496488928794861, + "rewards/VideoAccuracy/std": 0.465589314699173, + "step": 897, + "train_speed(iter/s)": 0.083513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 970.0, + "completions/mean_length": 542.6428833007812, + "completions/min_length": 407.0, + "entropy/max": 0.703125, + "entropy/mean": 0.294921875, + "entropy/min": 0.134765625, + "epoch": 0.898, + "grad_norm": 0.9412662227413006, + "kl": 0.2197265625, + "learning_rate": 5.192837096500058e-08, + "loss": 0.0022464762441813946, + "memory(GiB)": 137.41, + "reward": 1.9782623052597046, + "reward_std": 0.1456795185804367, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4908020794391632, + "rewards/EvidenceHallucination/std": 0.42891359329223633, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 1.445084571838379, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7467685341835022, + "rewards/VideoAccuracy/std": 0.4523145854473114, + "step": 898, + "train_speed(iter/s)": 0.083157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 808.0, + "completions/mean_length": 483.8095397949219, + "completions/min_length": 317.0, + "entropy/max": 0.8046875, + "entropy/mean": 0.44140625, + "entropy/min": 0.28515625, + "epoch": 0.899, + "grad_norm": 1.2302754346798352, + "kl": 0.263671875, + "learning_rate": 5.092384801887073e-08, + "loss": 0.002651178278028965, + "memory(GiB)": 137.41, + "reward": 1.5938938856124878, + "reward_std": 0.21898728609085083, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.41778501868247986, + "rewards/EvidenceHallucination/std": 0.4507313072681427, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.1160845756530762, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.45319414138793945, + "rewards/VideoAccuracy/std": 0.42926546931266785, + "step": 899, + "train_speed(iter/s)": 0.08286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 676.0, + "completions/mean_length": 478.23809814453125, + "completions/min_length": 337.0, + "entropy/max": 0.60546875, + "entropy/mean": 0.44921875, + "entropy/min": 0.302734375, + "epoch": 0.9, + "grad_norm": 1.3553010967243564, + "kl": 0.28125, + "learning_rate": 4.992888225905467e-08, + "loss": 0.002824255730956793, + "memory(GiB)": 137.41, + "reward": 1.6197789907455444, + "reward_std": 0.23516342043876648, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3931371569633484, + "rewards/EvidenceHallucination/std": 0.483131468296051, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.2699053287506104, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2380952388048172, + "rewards/HonestTime/std": 0.43108054995536804, + "rewards/VideoAccuracy/mean": 0.49353253841400146, + "rewards/VideoAccuracy/std": 0.48202645778656006, + "step": 900, + "train_speed(iter/s)": 0.082597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 904.0, + "completions/mean_length": 549.8333740234375, + "completions/min_length": 338.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.330078125, + "entropy/min": 0.1298828125, + "epoch": 0.901, + "grad_norm": 1.0452126125239998, + "kl": 0.2138671875, + "learning_rate": 4.8943483704846465e-08, + "loss": 0.002178219147026539, + "memory(GiB)": 137.41, + "reward": 2.160017251968384, + "reward_std": 0.20481643080711365, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6286323070526123, + "rewards/EvidenceHallucination/std": 0.39143964648246765, + "rewards/Evidence_Num_Record/mean": 3.6666667461395264, + "rewards/Evidence_Num_Record/std": 0.7543909549713135, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8342907428741455, + "rewards/VideoAccuracy/std": 0.4933658540248871, + "step": 901, + "train_speed(iter/s)": 0.081945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 721.0, + "completions/mean_length": 483.90478515625, + "completions/min_length": 358.0, + "entropy/max": 1.0703125, + "entropy/mean": 0.4765625, + "entropy/min": 0.287109375, + "epoch": 0.902, + "grad_norm": 1.2072911926562326, + "kl": 0.28515625, + "learning_rate": 4.796766227919857e-08, + "loss": 0.0028953691944479942, + "memory(GiB)": 137.41, + "reward": 1.8647540807724, + "reward_std": 0.21534258127212524, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6785789132118225, + "rewards/EvidenceHallucination/std": 0.4169284403324127, + "rewards/Evidence_Num_Record/mean": 4.857142925262451, + "rewards/Evidence_Num_Record/std": 1.704676866531372, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.6623717546463013, + "rewards/VideoAccuracy/std": 0.4006807208061218, + "step": 902, + "train_speed(iter/s)": 0.081629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 869.0, + "completions/mean_length": 456.9761962890625, + "completions/min_length": 260.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.431640625, + "entropy/min": 0.294921875, + "epoch": 0.903, + "grad_norm": 1.0096162813731613, + "kl": 0.255859375, + "learning_rate": 4.700142780862204e-08, + "loss": 0.0025641187094151974, + "memory(GiB)": 137.41, + "reward": 1.1655939817428589, + "reward_std": 0.25843915343284607, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.1373104751110077, + "rewards/EvidenceHallucination/std": 0.3412381708621979, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 1.6115148067474365, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.13813185691833496, + "rewards/VideoAccuracy/std": 0.3316902816295624, + "step": 903, + "train_speed(iter/s)": 0.081432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1014.0, + "completions/mean_length": 514.5952758789062, + "completions/min_length": 322.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.326171875, + "entropy/min": 0.1279296875, + "epoch": 0.904, + "grad_norm": 1.1391754145138482, + "kl": 0.26171875, + "learning_rate": 4.6044790023087364e-08, + "loss": 0.0026426189579069614, + "memory(GiB)": 137.41, + "reward": 2.224207878112793, + "reward_std": 0.1719508022069931, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6544135808944702, + "rewards/EvidenceHallucination/std": 0.4259692430496216, + "rewards/Evidence_Num_Record/mean": 3.8809523582458496, + "rewards/Evidence_Num_Record/std": 0.9160460829734802, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.785714328289032, + "rewards/HonestTime/std": 0.41529974341392517, + "rewards/VideoAccuracy/mean": 0.9361823797225952, + "rewards/VideoAccuracy/std": 0.519324541091919, + "step": 904, + "train_speed(iter/s)": 0.081137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 740.0, + "completions/mean_length": 478.0476379394531, + "completions/min_length": 306.0, + "entropy/max": 1.109375, + "entropy/mean": 0.490234375, + "entropy/min": 0.11669921875, + "epoch": 0.905, + "grad_norm": 1.1137963129761075, + "kl": 0.2373046875, + "learning_rate": 4.5097758555926127e-08, + "loss": 0.0024147345684468746, + "memory(GiB)": 137.41, + "reward": 2.0289058685302734, + "reward_std": 0.09318455308675766, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.563530683517456, + "rewards/EvidenceHallucination/std": 0.43566638231277466, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 1.1854714155197144, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8161997199058533, + "rewards/VideoAccuracy/std": 0.42606857419013977, + "step": 905, + "train_speed(iter/s)": 0.080757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 742.0, + "completions/mean_length": 457.8809509277344, + "completions/min_length": 305.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.453125, + "entropy/min": 0.31640625, + "epoch": 0.906, + "grad_norm": 1.4296618106636225, + "kl": 0.29296875, + "learning_rate": 4.416034294373472e-08, + "loss": 0.002946004271507263, + "memory(GiB)": 137.41, + "reward": 1.829820156097412, + "reward_std": 0.3908424377441406, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.607211172580719, + "rewards/EvidenceHallucination/std": 0.4409499764442444, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 0.9358022809028625, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.6464732885360718, + "rewards/VideoAccuracy/std": 0.39280256628990173, + "step": 906, + "train_speed(iter/s)": 0.080474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 710.0, + "completions/mean_length": 483.76190185546875, + "completions/min_length": 349.0, + "entropy/max": 0.59765625, + "entropy/mean": 0.4296875, + "entropy/min": 0.2578125, + "epoch": 0.907, + "grad_norm": 1.108937079876599, + "kl": 0.279296875, + "learning_rate": 4.323255262627845e-08, + "loss": 0.0028140272479504347, + "memory(GiB)": 137.41, + "reward": 1.8778079748153687, + "reward_std": 0.151490718126297, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.48161962628364563, + "rewards/EvidenceHallucination/std": 0.46850135922431946, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.134661316871643, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6814842224121094, + "rewards/VideoAccuracy/std": 0.6318930387496948, + "step": 907, + "train_speed(iter/s)": 0.080233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1080.0, + "completions/mean_length": 588.4761962890625, + "completions/min_length": 369.0, + "entropy/max": 1.0625, + "entropy/mean": 0.380859375, + "entropy/min": 0.1474609375, + "epoch": 0.908, + "grad_norm": 1.101879433193266, + "kl": 0.2001953125, + "learning_rate": 4.231439694639483e-08, + "loss": 0.002064464846625924, + "memory(GiB)": 137.41, + "reward": 2.2261457443237305, + "reward_std": 0.17315956950187683, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6907598376274109, + "rewards/EvidenceHallucination/std": 0.3077680766582489, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.7419134378433228, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9546603560447693, + "rewards/VideoAccuracy/std": 0.26798194646835327, + "step": 908, + "train_speed(iter/s)": 0.079857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 670.0, + "completions/mean_length": 482.0238037109375, + "completions/min_length": 348.0, + "entropy/max": 0.58203125, + "entropy/mean": 0.439453125, + "entropy/min": 0.2890625, + "epoch": 0.909, + "grad_norm": 0.9415331910374828, + "kl": 0.271484375, + "learning_rate": 4.140588514990162e-08, + "loss": 0.0027337963692843914, + "memory(GiB)": 137.41, + "reward": 1.7474002838134766, + "reward_std": 0.10227378457784653, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5650557279586792, + "rewards/EvidenceHallucination/std": 0.4620368182659149, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.192309856414795, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.5772462487220764, + "rewards/VideoAccuracy/std": 0.450177937746048, + "step": 909, + "train_speed(iter/s)": 0.079625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 588.0, + "completions/mean_length": 458.5476379394531, + "completions/min_length": 309.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.44140625, + "entropy/min": 0.31640625, + "epoch": 0.91, + "grad_norm": 1.4538286084749623, + "kl": 0.28515625, + "learning_rate": 4.050702638550274e-08, + "loss": 0.0028566864784806967, + "memory(GiB)": 137.41, + "reward": 1.526552677154541, + "reward_std": 0.2703312933444977, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3294943869113922, + "rewards/EvidenceHallucination/std": 0.4306771755218506, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7083376049995422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.4273204505443573, + "rewards/VideoAccuracy/std": 0.44682276248931885, + "step": 910, + "train_speed(iter/s)": 0.07938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 944.0, + "completions/mean_length": 599.5238037109375, + "completions/min_length": 338.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.287109375, + "entropy/min": 0.10693359375, + "epoch": 0.911, + "grad_norm": 0.9722409780067869, + "kl": 0.2060546875, + "learning_rate": 3.9617829704695625e-08, + "loss": 0.002105217892676592, + "memory(GiB)": 137.41, + "reward": 2.0139424800872803, + "reward_std": 0.14438004791736603, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5142771005630493, + "rewards/EvidenceHallucination/std": 0.44988352060317993, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 0.771516740322113, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 0.7158487439155579, + "rewards/VideoAccuracy/std": 0.4261232018470764, + "step": 911, + "train_speed(iter/s)": 0.079086 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 654.0, + "completions/mean_length": 473.40478515625, + "completions/min_length": 231.0, + "entropy/max": 1.0625, + "entropy/mean": 0.5078125, + "entropy/min": 0.1923828125, + "epoch": 0.912, + "grad_norm": 1.0951489120309386, + "kl": 0.267578125, + "learning_rate": 3.87383040616811e-08, + "loss": 0.0027020308189094067, + "memory(GiB)": 137.41, + "reward": 1.6963773965835571, + "reward_std": 0.10742451995611191, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5140460133552551, + "rewards/EvidenceHallucination/std": 0.4591914415359497, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.0638718605041504, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011404514313, + "rewards/VideoAccuracy/mean": 0.5316633582115173, + "rewards/VideoAccuracy/std": 0.4423540234565735, + "step": 912, + "train_speed(iter/s)": 0.078811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 587.0, + "completions/mean_length": 423.5238037109375, + "completions/min_length": 301.0, + "entropy/max": 0.703125, + "entropy/mean": 0.4609375, + "entropy/min": 0.318359375, + "epoch": 0.913, + "grad_norm": 1.2959113487517442, + "kl": 0.283203125, + "learning_rate": 3.78684583132729e-08, + "loss": 0.0028616702184081078, + "memory(GiB)": 137.41, + "reward": 1.6041069030761719, + "reward_std": 0.16784727573394775, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4477301239967346, + "rewards/EvidenceHallucination/std": 0.4808220863342285, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.8035924434661865, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5145609974861145, + "rewards/VideoAccuracy/std": 0.4534718692302704, + "step": 913, + "train_speed(iter/s)": 0.078499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 880.0, + "completions/mean_length": 505.6190490722656, + "completions/min_length": 274.0, + "entropy/max": 0.5, + "entropy/mean": 0.361328125, + "entropy/min": 0.1708984375, + "epoch": 0.914, + "grad_norm": 1.292160084245574, + "kl": 0.2578125, + "learning_rate": 3.700830121880771e-08, + "loss": 0.002599894069135189, + "memory(GiB)": 137.41, + "reward": 2.0603015422821045, + "reward_std": 0.19325129687786102, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.47536569833755493, + "rewards/EvidenceHallucination/std": 0.46217137575149536, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.9606061577796936, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7985616326332092, + "rewards/VideoAccuracy/std": 0.5646498203277588, + "step": 914, + "train_speed(iter/s)": 0.078242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 831.0, + "completions/mean_length": 504.1428527832031, + "completions/min_length": 338.0, + "entropy/max": 2.0, + "entropy/mean": 0.43359375, + "entropy/min": 0.1767578125, + "epoch": 0.915, + "grad_norm": 0.8688230699508614, + "kl": 0.23828125, + "learning_rate": 3.615784144005796e-08, + "loss": 0.002415733877569437, + "memory(GiB)": 137.41, + "reward": 1.6662129163742065, + "reward_std": 0.17078039050102234, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3501947224140167, + "rewards/EvidenceHallucination/std": 0.45579585433006287, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.2137902975082397, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.4961739182472229, + "rewards/VideoAccuracy/std": 0.4929002821445465, + "step": 915, + "train_speed(iter/s)": 0.07793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 661.0, + "completions/mean_length": 462.7857360839844, + "completions/min_length": 358.0, + "entropy/max": 1.2109375, + "entropy/mean": 0.455078125, + "entropy/min": 0.25390625, + "epoch": 0.916, + "grad_norm": 1.0376100448719379, + "kl": 0.279296875, + "learning_rate": 3.531708754114437e-08, + "loss": 0.0028096698224544525, + "memory(GiB)": 137.41, + "reward": 1.4328473806381226, + "reward_std": 0.23865549266338348, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3442094027996063, + "rewards/EvidenceHallucination/std": 0.44941267371177673, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.6608339548110962, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.3306720554828644, + "rewards/VideoAccuracy/std": 0.4214882254600525, + "step": 916, + "train_speed(iter/s)": 0.077643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 578.0, + "completions/mean_length": 447.6428527832031, + "completions/min_length": 294.0, + "entropy/max": 0.52734375, + "entropy/mean": 0.41015625, + "entropy/min": 0.271484375, + "epoch": 0.917, + "grad_norm": 1.3274216254416777, + "kl": 0.30078125, + "learning_rate": 3.448604798844912e-08, + "loss": 0.003033221699297428, + "memory(GiB)": 137.41, + "reward": 2.051182985305786, + "reward_std": 0.19719287753105164, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6522086262702942, + "rewards/EvidenceHallucination/std": 0.4044216573238373, + "rewards/Evidence_Num_Record/mean": 3.809523820877075, + "rewards/Evidence_Num_Record/std": 0.7404050827026367, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.8255032896995544, + "rewards/VideoAccuracy/std": 0.5068250894546509, + "step": 917, + "train_speed(iter/s)": 0.077421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1117.0, + "completions/mean_length": 627.1428833007812, + "completions/min_length": 343.0, + "entropy/max": 1.0546875, + "entropy/mean": 0.36328125, + "entropy/min": 0.1513671875, + "epoch": 0.918, + "grad_norm": 1.1210837527492852, + "kl": 0.2041015625, + "learning_rate": 3.366473115053148e-08, + "loss": 0.0020890242885798216, + "memory(GiB)": 137.41, + "reward": 1.8940800428390503, + "reward_std": 0.18140888214111328, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.43879175186157227, + "rewards/EvidenceHallucination/std": 0.4503001570701599, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 0.9323829412460327, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.6777501702308655, + "rewards/VideoAccuracy/std": 0.43920210003852844, + "step": 918, + "train_speed(iter/s)": 0.077142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 729.0, + "completions/mean_length": 472.3571472167969, + "completions/min_length": 247.0, + "entropy/max": 0.6875, + "entropy/mean": 0.44921875, + "entropy/min": 0.251953125, + "epoch": 0.919, + "grad_norm": 1.5327354185688424, + "kl": 0.271484375, + "learning_rate": 3.285314529804295e-08, + "loss": 0.0027872147038578987, + "memory(GiB)": 137.41, + "reward": 2.064453601837158, + "reward_std": 0.09906556457281113, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7417626976966858, + "rewards/EvidenceHallucination/std": 0.3818971514701843, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 1.353148341178894, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.825624942779541, + "rewards/VideoAccuracy/std": 0.22875171899795532, + "step": 919, + "train_speed(iter/s)": 0.076839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 618.0, + "completions/mean_length": 463.21429443359375, + "completions/min_length": 319.0, + "entropy/max": 0.69140625, + "entropy/mean": 0.458984375, + "entropy/min": 0.33984375, + "epoch": 0.92, + "grad_norm": 1.40761268214982, + "kl": 0.283203125, + "learning_rate": 3.205129860364375e-08, + "loss": 0.0028404868207871914, + "memory(GiB)": 137.41, + "reward": 1.4765101671218872, + "reward_std": 0.26599860191345215, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3487204313278198, + "rewards/EvidenceHallucination/std": 0.4532499313354492, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 0.7513054609298706, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.3781946003437042, + "rewards/VideoAccuracy/std": 0.37577101588249207, + "step": 920, + "train_speed(iter/s)": 0.076347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 835.0, + "completions/mean_length": 548.2619018554688, + "completions/min_length": 330.0, + "entropy/max": 0.490234375, + "entropy/mean": 0.28515625, + "entropy/min": 0.1337890625, + "epoch": 0.921, + "grad_norm": 1.039517250813372, + "kl": 0.2158203125, + "learning_rate": 3.125919914192143e-08, + "loss": 0.002195358509197831, + "memory(GiB)": 137.41, + "reward": 2.1239235401153564, + "reward_std": 0.23332199454307556, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.576139509677887, + "rewards/EvidenceHallucination/std": 0.44498687982559204, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.4915073812007904, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.8086954951286316, + "rewards/VideoAccuracy/std": 0.37584733963012695, + "step": 921, + "train_speed(iter/s)": 0.076084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 906.0, + "completions/mean_length": 505.8571472167969, + "completions/min_length": 328.0, + "entropy/max": 0.703125, + "entropy/mean": 0.4375, + "entropy/min": 0.251953125, + "epoch": 0.922, + "grad_norm": 1.4127220858797223, + "kl": 0.251953125, + "learning_rate": 3.0476854889308734e-08, + "loss": 0.0025830313097685575, + "memory(GiB)": 137.41, + "reward": 1.8330525159835815, + "reward_std": 0.3108817934989929, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5397878289222717, + "rewards/EvidenceHallucination/std": 0.44190317392349243, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.1001002788543701, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.6298568844795227, + "rewards/VideoAccuracy/std": 0.3939414322376251, + "step": 922, + "train_speed(iter/s)": 0.075874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 985.0, + "completions/mean_length": 515.357177734375, + "completions/min_length": 321.0, + "entropy/max": 2.046875, + "entropy/mean": 0.498046875, + "entropy/min": 0.259765625, + "epoch": 0.923, + "grad_norm": 0.9224311505711394, + "kl": 0.2421875, + "learning_rate": 2.9704273724003526e-08, + "loss": 0.002463304903358221, + "memory(GiB)": 137.41, + "reward": 1.363296627998352, + "reward_std": 0.09228571504354477, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.2945178747177124, + "rewards/EvidenceHallucination/std": 0.44748246669769287, + "rewards/Evidence_Num_Record/mean": 4.214285850524902, + "rewards/Evidence_Num_Record/std": 0.9761975407600403, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.30439308285713196, + "rewards/VideoAccuracy/std": 0.44341716170310974, + "step": 923, + "train_speed(iter/s)": 0.075482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 656.0, + "completions/mean_length": 489.16668701171875, + "completions/min_length": 361.0, + "entropy/max": 0.62890625, + "entropy/mean": 0.357421875, + "entropy/min": 0.138671875, + "epoch": 0.924, + "grad_norm": 1.0761774381857239, + "kl": 0.263671875, + "learning_rate": 2.8941463425889767e-08, + "loss": 0.0026684931945055723, + "memory(GiB)": 137.41, + "reward": 2.0897278785705566, + "reward_std": 0.07733561098575592, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.611544132232666, + "rewards/EvidenceHallucination/std": 0.44709569215774536, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.8785083889961243, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.8007524609565735, + "rewards/VideoAccuracy/std": 0.5321916937828064, + "step": 924, + "train_speed(iter/s)": 0.07527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 790.0, + "completions/mean_length": 510.5, + "completions/min_length": 271.0, + "entropy/max": 0.8984375, + "entropy/mean": 0.39453125, + "entropy/min": 0.138671875, + "epoch": 0.925, + "grad_norm": 1.2208569006575813, + "kl": 0.2294921875, + "learning_rate": 2.8188431676458345e-08, + "loss": 0.0023357027675956488, + "memory(GiB)": 137.41, + "reward": 2.259730815887451, + "reward_std": 0.08099796622991562, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.864700436592102, + "rewards/EvidenceHallucination/std": 0.21934209764003754, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.5151193141937256, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9534574151039124, + "rewards/VideoAccuracy/std": 0.24664339423179626, + "step": 925, + "train_speed(iter/s)": 0.074957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 743.0, + "completions/mean_length": 487.5, + "completions/min_length": 359.0, + "entropy/max": 0.75, + "entropy/mean": 0.455078125, + "entropy/min": 0.24609375, + "epoch": 0.926, + "grad_norm": 1.0885052875433516, + "kl": 0.275390625, + "learning_rate": 2.7445186058730917e-08, + "loss": 0.0027898214757442474, + "memory(GiB)": 137.41, + "reward": 1.572590947151184, + "reward_std": 0.2691900134086609, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.35204336047172546, + "rewards/EvidenceHallucination/std": 0.4198104441165924, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 1.399352788925171, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.4688488841056824, + "rewards/VideoAccuracy/std": 0.451306015253067, + "step": 926, + "train_speed(iter/s)": 0.07473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 555.0, + "completions/mean_length": 428.23809814453125, + "completions/min_length": 257.0, + "entropy/max": 0.578125, + "entropy/mean": 0.453125, + "entropy/min": 0.25390625, + "epoch": 0.927, + "grad_norm": 1.4724280433107912, + "kl": 0.302734375, + "learning_rate": 2.6711734057182413e-08, + "loss": 0.0030424667056649923, + "memory(GiB)": 137.41, + "reward": 1.6641291379928589, + "reward_std": 0.2164650410413742, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.37653282284736633, + "rewards/EvidenceHallucination/std": 0.4643819332122803, + "rewards/Evidence_Num_Record/mean": 3.6190476417541504, + "rewards/Evidence_Num_Record/std": 0.8540400862693787, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.49358442425727844, + "rewards/VideoAccuracy/std": 0.5288416147232056, + "step": 927, + "train_speed(iter/s)": 0.074533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 858.0, + "completions/mean_length": 610.3095092773438, + "completions/min_length": 376.0, + "entropy/max": 0.578125, + "entropy/mean": 0.302734375, + "entropy/min": 0.09326171875, + "epoch": 0.928, + "grad_norm": 0.9018906561019351, + "kl": 0.203125, + "learning_rate": 2.5988083057666533e-08, + "loss": 0.002057016594335437, + "memory(GiB)": 137.41, + "reward": 2.055840253829956, + "reward_std": 0.09451095759868622, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.602540135383606, + "rewards/EvidenceHallucination/std": 0.38646990060806274, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.2549552917480469, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496559262275696, + "rewards/VideoAccuracy/mean": 0.8067607879638672, + "rewards/VideoAccuracy/std": 0.40263330936431885, + "step": 928, + "train_speed(iter/s)": 0.074268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 757.0, + "completions/mean_length": 434.21429443359375, + "completions/min_length": 318.0, + "entropy/max": 0.71484375, + "entropy/mean": 0.39453125, + "entropy/min": 0.228515625, + "epoch": 0.929, + "grad_norm": 1.0676244490347249, + "kl": 0.275390625, + "learning_rate": 2.5274240347340715e-08, + "loss": 0.0027612752746790648, + "memory(GiB)": 137.41, + "reward": 1.6721405982971191, + "reward_std": 0.13350006937980652, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5001934766769409, + "rewards/EvidenceHallucination/std": 0.46985816955566406, + "rewards/Evidence_Num_Record/mean": 3.738095283508301, + "rewards/Evidence_Num_Record/std": 0.8850939869880676, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.47686392068862915, + "rewards/VideoAccuracy/std": 0.3949807584285736, + "step": 929, + "train_speed(iter/s)": 0.073993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 777.0, + "completions/mean_length": 483.16668701171875, + "completions/min_length": 289.0, + "entropy/max": 0.8671875, + "entropy/mean": 0.4609375, + "entropy/min": 0.275390625, + "epoch": 0.93, + "grad_norm": 1.2716437008238204, + "kl": 0.26171875, + "learning_rate": 2.4570213114592953e-08, + "loss": 0.0026454541366547346, + "memory(GiB)": 137.41, + "reward": 1.5933008193969727, + "reward_std": 0.3612544536590576, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4227463901042938, + "rewards/EvidenceHallucination/std": 0.4742928743362427, + "rewards/Evidence_Num_Record/mean": 4.428571701049805, + "rewards/Evidence_Num_Record/std": 2.3073885440826416, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.48017996549606323, + "rewards/VideoAccuracy/std": 0.4840567708015442, + "step": 930, + "train_speed(iter/s)": 0.073792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1160.0, + "completions/mean_length": 608.857177734375, + "completions/min_length": 347.0, + "entropy/max": 0.55078125, + "entropy/mean": 0.28515625, + "entropy/min": 0.123046875, + "epoch": 0.931, + "grad_norm": 1.0547840891430953, + "kl": 0.2041015625, + "learning_rate": 2.3876008448969977e-08, + "loss": 0.0020937395747750998, + "memory(GiB)": 137.41, + "reward": 1.9991267919540405, + "reward_std": 0.1399175077676773, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.26360222697257996, + "rewards/EvidenceHallucination/std": 0.3506872057914734, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 0.7624309062957764, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9523809552192688, + "rewards/HonestTime/std": 0.21554027497768402, + "rewards/VideoAccuracy/mean": 0.7559301257133484, + "rewards/VideoAccuracy/std": 0.5217366814613342, + "step": 931, + "train_speed(iter/s)": 0.073486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/mean_length": 515.452392578125, + "completions/min_length": 286.0, + "entropy/max": 0.953125, + "entropy/mean": 0.44921875, + "entropy/min": 0.23046875, + "epoch": 0.932, + "grad_norm": 1.5907493693109938, + "kl": 0.267578125, + "learning_rate": 2.3191633341104855e-08, + "loss": 0.002785654505714774, + "memory(GiB)": 137.41, + "reward": 1.6841378211975098, + "reward_std": 0.1959161013364792, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3821144700050354, + "rewards/EvidenceHallucination/std": 0.435477614402771, + "rewards/Evidence_Num_Record/mean": 4.904761791229248, + "rewards/Evidence_Num_Record/std": 2.6486032009124756, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4285714328289032, + "rewards/HonestTime/std": 0.5008702874183655, + "rewards/VideoAccuracy/mean": 0.5220006108283997, + "rewards/VideoAccuracy/std": 0.3990272283554077, + "step": 932, + "train_speed(iter/s)": 0.073237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1972.0, + "completions/mean_length": 492.71429443359375, + "completions/min_length": 309.0, + "entropy/max": 0.80078125, + "entropy/mean": 0.43359375, + "entropy/min": 0.205078125, + "epoch": 0.933, + "grad_norm": 1.3993444707025986, + "kl": 0.255859375, + "learning_rate": 2.2517094682647396e-08, + "loss": 0.0026667274069041014, + "memory(GiB)": 137.41, + "reward": 1.4650702476501465, + "reward_std": 0.1610909253358841, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3479980528354645, + "rewards/EvidenceHallucination/std": 0.45839816331863403, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 3.8258397579193115, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.39547058939933777, + "rewards/VideoAccuracy/std": 0.4801982343196869, + "step": 933, + "train_speed(iter/s)": 0.072829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 688.0, + "completions/mean_length": 515.3095092773438, + "completions/min_length": 389.0, + "entropy/max": 0.5703125, + "entropy/mean": 0.369140625, + "entropy/min": 0.1494140625, + "epoch": 0.934, + "grad_norm": 1.0225077561198843, + "kl": 0.2421875, + "learning_rate": 2.185239926619431e-08, + "loss": 0.002430618042126298, + "memory(GiB)": 137.41, + "reward": 2.2757763862609863, + "reward_std": 0.09640401601791382, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6919313073158264, + "rewards/EvidenceHallucination/std": 0.42337656021118164, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.584348738193512, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.9754852056503296, + "rewards/VideoAccuracy/std": 0.46474310755729675, + "step": 934, + "train_speed(iter/s)": 0.072638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1123.0, + "completions/mean_length": 543.047607421875, + "completions/min_length": 321.0, + "entropy/max": 1.21875, + "entropy/mean": 0.4375, + "entropy/min": 0.10302734375, + "epoch": 0.935, + "grad_norm": 1.019911305431324, + "kl": 0.21875, + "learning_rate": 2.119755378522137e-08, + "loss": 0.0022481405176222324, + "memory(GiB)": 137.41, + "reward": 1.7865290641784668, + "reward_std": 0.15708360075950623, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.43877112865448, + "rewards/EvidenceHallucination/std": 0.4510277807712555, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.0101114511489868, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5654414892196655, + "rewards/VideoAccuracy/std": 0.4084523320198059, + "step": 935, + "train_speed(iter/s)": 0.072413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 752.0, + "completions/mean_length": 463.6428527832031, + "completions/min_length": 250.0, + "entropy/max": 0.77734375, + "entropy/mean": 0.4765625, + "entropy/min": 0.25390625, + "epoch": 0.936, + "grad_norm": 0.9792749729206832, + "kl": 0.259765625, + "learning_rate": 2.0552564834014797e-08, + "loss": 0.0026516977231949568, + "memory(GiB)": 137.41, + "reward": 1.4407650232315063, + "reward_std": 0.23377752304077148, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3174581229686737, + "rewards/EvidenceHallucination/std": 0.45641613006591797, + "rewards/Evidence_Num_Record/mean": 3.952381134033203, + "rewards/Evidence_Num_Record/std": 1.3960288763046265, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.34870198369026184, + "rewards/VideoAccuracy/std": 0.45109590888023376, + "step": 936, + "train_speed(iter/s)": 0.072194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 548.0, + "completions/mean_length": 451.3809509277344, + "completions/min_length": 359.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.4453125, + "entropy/min": 0.314453125, + "epoch": 0.937, + "grad_norm": 1.4266157784650282, + "kl": 0.296875, + "learning_rate": 1.9917438907606553e-08, + "loss": 0.0029722112230956554, + "memory(GiB)": 137.41, + "reward": 2.01104474067688, + "reward_std": 0.28446537256240845, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6532959938049316, + "rewards/EvidenceHallucination/std": 0.4294026494026184, + "rewards/Evidence_Num_Record/mean": 3.8333334922790527, + "rewards/Evidence_Num_Record/std": 0.8811485767364502, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4523809552192688, + "rewards/HonestTime/std": 0.503760576248169, + "rewards/VideoAccuracy/mean": 0.7899093627929688, + "rewards/VideoAccuracy/std": 0.4812328815460205, + "step": 937, + "train_speed(iter/s)": 0.072003 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 932.0, + "completions/mean_length": 586.5, + "completions/min_length": 343.0, + "entropy/max": 1.1328125, + "entropy/mean": 0.341796875, + "entropy/min": 0.1171875, + "epoch": 0.938, + "grad_norm": 0.826713359596132, + "kl": 0.2060546875, + "learning_rate": 1.9292182401707602e-08, + "loss": 0.0020839450880885124, + "memory(GiB)": 137.41, + "reward": 1.9464994668960571, + "reward_std": 0.05200798064470291, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5436687469482422, + "rewards/EvidenceHallucination/std": 0.4424469470977783, + "rewards/Evidence_Num_Record/mean": 4.238095283508301, + "rewards/Evidence_Num_Record/std": 1.1001002788543701, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.7044323682785034, + "rewards/VideoAccuracy/std": 0.43772637844085693, + "step": 938, + "train_speed(iter/s)": 0.071761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 877.0, + "completions/mean_length": 446.9761962890625, + "completions/min_length": 326.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.421875, + "entropy/min": 0.279296875, + "epoch": 0.939, + "grad_norm": 1.3265743791902576, + "kl": 0.275390625, + "learning_rate": 1.8676801612643954e-08, + "loss": 0.0028159632347524166, + "memory(GiB)": 137.41, + "reward": 1.7494856119155884, + "reward_std": 0.28992414474487305, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5868315100669861, + "rewards/EvidenceHallucination/std": 0.46872034668922424, + "rewards/Evidence_Num_Record/mean": 3.690476179122925, + "rewards/Evidence_Num_Record/std": 0.6434698104858398, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.5654523968696594, + "rewards/VideoAccuracy/std": 0.43522748351097107, + "step": 939, + "train_speed(iter/s)": 0.071502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 793.0, + "completions/mean_length": 488.452392578125, + "completions/min_length": 333.0, + "entropy/max": 0.765625, + "entropy/mean": 0.5, + "entropy/min": 0.2578125, + "epoch": 0.94, + "grad_norm": 1.2902749148649342, + "kl": 0.271484375, + "learning_rate": 1.807130273729329e-08, + "loss": 0.002739190822467208, + "memory(GiB)": 137.41, + "reward": 1.7307583093643188, + "reward_std": 0.2225092649459839, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45415210723876953, + "rewards/EvidenceHallucination/std": 0.46369168162345886, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.9093654155731201, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6065945625305176, + "rewards/VideoAccuracy/std": 0.568202555179596, + "step": 940, + "train_speed(iter/s)": 0.071291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 923.0, + "completions/mean_length": 569.90478515625, + "completions/min_length": 362.0, + "entropy/max": 0.546875, + "entropy/mean": 0.271484375, + "entropy/min": 0.1357421875, + "epoch": 0.941, + "grad_norm": 1.0144868951991712, + "kl": 0.21484375, + "learning_rate": 1.747569187302267e-08, + "loss": 0.0021691806614398956, + "memory(GiB)": 137.41, + "reward": 2.290971517562866, + "reward_std": 0.25219282507896423, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6649720668792725, + "rewards/EvidenceHallucination/std": 0.35364794731140137, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.6917466521263123, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9579771161079407, + "rewards/VideoAccuracy/std": 0.4417040944099426, + "step": 941, + "train_speed(iter/s)": 0.071053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 815.0, + "completions/mean_length": 460.3333435058594, + "completions/min_length": 305.0, + "entropy/max": 0.6875, + "entropy/mean": 0.443359375, + "entropy/min": 0.298828125, + "epoch": 0.942, + "grad_norm": 1.3556173582357736, + "kl": 0.2734375, + "learning_rate": 1.68899750176269e-08, + "loss": 0.0027679037302732468, + "memory(GiB)": 137.41, + "reward": 2.120345115661621, + "reward_std": 0.043410640209913254, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.8824974298477173, + "rewards/EvidenceHallucination/std": 0.10965771973133087, + "rewards/Evidence_Num_Record/mean": 4.0, + "rewards/Evidence_Num_Record/std": 0.8834521770477295, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711870074272156, + "rewards/VideoAccuracy/mean": 0.8771790266036987, + "rewards/VideoAccuracy/std": 0.18448089063167572, + "step": 942, + "train_speed(iter/s)": 0.070841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 697.0, + "completions/mean_length": 440.8333435058594, + "completions/min_length": 304.0, + "entropy/max": 0.59765625, + "entropy/mean": 0.453125, + "entropy/min": 0.3046875, + "epoch": 0.943, + "grad_norm": 1.1990253561088162, + "kl": 0.275390625, + "learning_rate": 1.6314158069267946e-08, + "loss": 0.002778817666694522, + "memory(GiB)": 137.41, + "reward": 1.6294825077056885, + "reward_std": 0.18033751845359802, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4706454277038574, + "rewards/EvidenceHallucination/std": 0.4809424579143524, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 0.813646674156189, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5353533625602722, + "rewards/VideoAccuracy/std": 0.4947953522205353, + "step": 943, + "train_speed(iter/s)": 0.070629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 700.0, + "completions/mean_length": 484.3571472167969, + "completions/min_length": 286.0, + "entropy/max": 0.5625, + "entropy/mean": 0.376953125, + "entropy/min": 0.10595703125, + "epoch": 0.944, + "grad_norm": 1.212993572010397, + "kl": 0.263671875, + "learning_rate": 1.574824682641629e-08, + "loss": 0.0026726480573415756, + "memory(GiB)": 137.41, + "reward": 1.8845123052597046, + "reward_std": 0.20181572437286377, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3115893006324768, + "rewards/EvidenceHallucination/std": 0.4246801733970642, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.8913052082061768, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6555277705192566, + "rewards/VideoAccuracy/std": 0.45412132143974304, + "step": 944, + "train_speed(iter/s)": 0.070466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 829.0, + "completions/mean_length": 504.19049072265625, + "completions/min_length": 346.0, + "entropy/max": 0.72265625, + "entropy/mean": 0.408203125, + "entropy/min": 0.162109375, + "epoch": 0.945, + "grad_norm": 0.9727982469014594, + "kl": 0.251953125, + "learning_rate": 1.519224698779198e-08, + "loss": 0.002565240953117609, + "memory(GiB)": 137.41, + "reward": 1.833389163017273, + "reward_std": 0.11947314441204071, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4485011398792267, + "rewards/EvidenceHallucination/std": 0.44314906001091003, + "rewards/Evidence_Num_Record/mean": 4.0714287757873535, + "rewards/Evidence_Num_Record/std": 1.7021199464797974, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.6436887979507446, + "rewards/VideoAccuracy/std": 0.5075082182884216, + "step": 945, + "train_speed(iter/s)": 0.070227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1908.0, + "completions/mean_length": 541.3333129882812, + "completions/min_length": 327.0, + "entropy/max": 1.3203125, + "entropy/mean": 0.46875, + "entropy/min": 0.158203125, + "epoch": 0.946, + "grad_norm": 1.2465993695248823, + "kl": 0.2451171875, + "learning_rate": 1.4646164152307016e-08, + "loss": 0.0025582569651305676, + "memory(GiB)": 137.41, + "reward": 1.7707005739212036, + "reward_std": 0.14271783828735352, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4889131784439087, + "rewards/EvidenceHallucination/std": 0.4265022277832031, + "rewards/Evidence_Num_Record/mean": 5.214285850524902, + "rewards/Evidence_Num_Record/std": 4.099587917327881, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6395845413208008, + "rewards/VideoAccuracy/std": 0.4320310354232788, + "step": 946, + "train_speed(iter/s)": 0.069892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/mean_length": 470.452392578125, + "completions/min_length": 350.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.4296875, + "entropy/min": 0.283203125, + "epoch": 0.947, + "grad_norm": 1.1568858956657802, + "kl": 0.28125, + "learning_rate": 1.4110003819009509e-08, + "loss": 0.002838264685124159, + "memory(GiB)": 137.41, + "reward": 1.844517707824707, + "reward_std": 0.12314598262310028, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3775913119316101, + "rewards/EvidenceHallucination/std": 0.4459899663925171, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.8111447691917419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.6689993143081665, + "rewards/VideoAccuracy/std": 0.5376002788543701, + "step": 947, + "train_speed(iter/s)": 0.069711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1072.0, + "completions/mean_length": 537.857177734375, + "completions/min_length": 350.0, + "entropy/max": 2.015625, + "entropy/mean": 0.349609375, + "entropy/min": 0.126953125, + "epoch": 0.948, + "grad_norm": 1.0323083487996982, + "kl": 0.20703125, + "learning_rate": 1.3583771387028264e-08, + "loss": 0.0021275142207741737, + "memory(GiB)": 137.41, + "reward": 2.2996435165405273, + "reward_std": 0.07079820334911346, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7686232924461365, + "rewards/EvidenceHallucination/std": 0.3105987012386322, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.8913052678108215, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 1.0125852823257446, + "rewards/VideoAccuracy/std": 0.1856028288602829, + "step": 948, + "train_speed(iter/s)": 0.069326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 765.0, + "completions/mean_length": 486.4285888671875, + "completions/min_length": 339.0, + "entropy/max": 0.875, + "entropy/mean": 0.451171875, + "entropy/min": 0.234375, + "epoch": 0.949, + "grad_norm": 1.1457712081147309, + "kl": 0.2734375, + "learning_rate": 1.3067472155517734e-08, + "loss": 0.0027662317734211683, + "memory(GiB)": 137.41, + "reward": 1.6505743265151978, + "reward_std": 0.12623053789138794, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.44372376799583435, + "rewards/EvidenceHallucination/std": 0.4626716375350952, + "rewards/Evidence_Num_Record/mean": 4.142857074737549, + "rewards/Evidence_Num_Record/std": 1.049307107925415, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.5046866536140442, + "rewards/VideoAccuracy/std": 0.4409211575984955, + "step": 949, + "train_speed(iter/s)": 0.069144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 645.0, + "completions/mean_length": 448.0476379394531, + "completions/min_length": 307.0, + "entropy/max": 0.67578125, + "entropy/mean": 0.42578125, + "entropy/min": 0.203125, + "epoch": 0.95, + "grad_norm": 1.2601635925600487, + "kl": 0.26171875, + "learning_rate": 1.2561111323605711e-08, + "loss": 0.0026425044052302837, + "memory(GiB)": 137.41, + "reward": 1.7202696800231934, + "reward_std": 0.1176731213927269, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5164002776145935, + "rewards/EvidenceHallucination/std": 0.45663920044898987, + "rewards/Evidence_Num_Record/mean": 3.4285714626312256, + "rewards/Evidence_Num_Record/std": 0.9144598841667175, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5836562514305115, + "rewards/VideoAccuracy/std": 0.46830034255981445, + "step": 950, + "train_speed(iter/s)": 0.068991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 847.0, + "completions/mean_length": 575.5238037109375, + "completions/min_length": 370.0, + "entropy/max": 0.494140625, + "entropy/mean": 0.267578125, + "entropy/min": 0.142578125, + "epoch": 0.951, + "grad_norm": 0.9935481006326623, + "kl": 0.20703125, + "learning_rate": 1.2064693990339936e-08, + "loss": 0.002088331850245595, + "memory(GiB)": 137.41, + "reward": 2.3192498683929443, + "reward_std": 0.15067747235298157, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7569528222084045, + "rewards/EvidenceHallucination/std": 0.29730042815208435, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 0.9236220121383667, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.9678592085838318, + "rewards/VideoAccuracy/std": 0.25056150555610657, + "step": 951, + "train_speed(iter/s)": 0.068747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 934.0, + "completions/mean_length": 482.4285888671875, + "completions/min_length": 336.0, + "entropy/max": 1.8671875, + "entropy/mean": 0.515625, + "entropy/min": 0.1796875, + "epoch": 0.952, + "grad_norm": 1.2671158801258262, + "kl": 0.259765625, + "learning_rate": 1.1578225154637578e-08, + "loss": 0.002628859132528305, + "memory(GiB)": 137.41, + "reward": 1.8591318130493164, + "reward_std": 0.14313216507434845, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5897926688194275, + "rewards/EvidenceHallucination/std": 0.4144156873226166, + "rewards/Evidence_Num_Record/mean": 4.642857074737549, + "rewards/Evidence_Num_Record/std": 1.664865493774414, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.6840303540229797, + "rewards/VideoAccuracy/std": 0.4067501425743103, + "step": 952, + "train_speed(iter/s)": 0.06856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 542.0, + "completions/mean_length": 405.4285888671875, + "completions/min_length": 276.0, + "entropy/max": 0.66796875, + "entropy/mean": 0.439453125, + "entropy/min": 0.2890625, + "epoch": 0.953, + "grad_norm": 1.0800177401519737, + "kl": 0.294921875, + "learning_rate": 1.1101709715234386e-08, + "loss": 0.0029569934122264385, + "memory(GiB)": 137.41, + "reward": 1.3686178922653198, + "reward_std": 0.1393006443977356, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29636797308921814, + "rewards/EvidenceHallucination/std": 0.433406800031662, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.8611501455307007, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.30934420228004456, + "rewards/VideoAccuracy/std": 0.4380314350128174, + "step": 953, + "train_speed(iter/s)": 0.068385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1036.0, + "completions/mean_length": 550.452392578125, + "completions/min_length": 320.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.3671875, + "entropy/min": 0.1640625, + "epoch": 0.954, + "grad_norm": 1.061834776497953, + "kl": 0.2451171875, + "learning_rate": 1.0635152470635511e-08, + "loss": 0.002483302028849721, + "memory(GiB)": 137.41, + "reward": 2.0882914066314697, + "reward_std": 0.13326478004455566, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6413856148719788, + "rewards/EvidenceHallucination/std": 0.43948817253112793, + "rewards/Evidence_Num_Record/mean": 4.047619342803955, + "rewards/Evidence_Num_Record/std": 0.7948732376098633, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7933475971221924, + "rewards/VideoAccuracy/std": 0.45950794219970703, + "step": 954, + "train_speed(iter/s)": 0.068159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 854.0, + "completions/mean_length": 531.952392578125, + "completions/min_length": 345.0, + "entropy/max": 1.0078125, + "entropy/mean": 0.388671875, + "entropy/min": 0.150390625, + "epoch": 0.955, + "grad_norm": 1.2273595567320048, + "kl": 0.2490234375, + "learning_rate": 1.0178558119067315e-08, + "loss": 0.0025239530950784683, + "memory(GiB)": 137.41, + "reward": 2.1271867752075195, + "reward_std": 0.10089495033025742, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7541455030441284, + "rewards/EvidenceHallucination/std": 0.25523731112480164, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.7662469148635864, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.506060779094696, + "rewards/VideoAccuracy/mean": 0.8763576149940491, + "rewards/VideoAccuracy/std": 0.22635163366794586, + "step": 955, + "train_speed(iter/s)": 0.067904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 710.0, + "completions/mean_length": 479.452392578125, + "completions/min_length": 313.0, + "entropy/max": 0.68359375, + "entropy/mean": 0.46484375, + "entropy/min": 0.294921875, + "epoch": 0.956, + "grad_norm": 1.1081920964545056, + "kl": 0.263671875, + "learning_rate": 9.731931258429638e-09, + "loss": 0.002650549402460456, + "memory(GiB)": 137.41, + "reward": 1.5818074941635132, + "reward_std": 0.20420019328594208, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.42155587673187256, + "rewards/EvidenceHallucination/std": 0.47306376695632935, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.21091628074646, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.46416300535202026, + "rewards/VideoAccuracy/std": 0.472253680229187, + "step": 956, + "train_speed(iter/s)": 0.067742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 698.0, + "completions/mean_length": 451.7857360839844, + "completions/min_length": 307.0, + "entropy/max": 0.5234375, + "entropy/mean": 0.408203125, + "entropy/min": 0.275390625, + "epoch": 0.957, + "grad_norm": 1.3877350890068532, + "kl": 0.283203125, + "learning_rate": 9.295276386250273e-09, + "loss": 0.0028288476169109344, + "memory(GiB)": 137.41, + "reward": 2.050288200378418, + "reward_std": 0.24018505215644836, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6676385402679443, + "rewards/EvidenceHallucination/std": 0.4354212284088135, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 1.007521152496338, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8167604804039001, + "rewards/VideoAccuracy/std": 0.520463228225708, + "step": 957, + "train_speed(iter/s)": 0.067538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 867.0, + "completions/mean_length": 519.6904907226562, + "completions/min_length": 334.0, + "entropy/max": 0.93359375, + "entropy/mean": 0.408203125, + "entropy/min": 0.138671875, + "epoch": 0.958, + "grad_norm": 0.9809102349146418, + "kl": 0.2216796875, + "learning_rate": 8.868597899638897e-09, + "loss": 0.0022694962099194527, + "memory(GiB)": 137.41, + "reward": 2.078068733215332, + "reward_std": 0.08545194566249847, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5540817975997925, + "rewards/EvidenceHallucination/std": 0.44805270433425903, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.0704021453857422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.8434427380561829, + "rewards/VideoAccuracy/std": 0.49243026971817017, + "step": 958, + "train_speed(iter/s)": 0.067335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 657.0, + "completions/mean_length": 469.21429443359375, + "completions/min_length": 283.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.4296875, + "entropy/min": 0.25, + "epoch": 0.959, + "grad_norm": 1.1630105588447377, + "kl": 0.27734375, + "learning_rate": 8.45190009524288e-09, + "loss": 0.0028287163004279137, + "memory(GiB)": 137.41, + "reward": 1.6504467725753784, + "reward_std": 0.2651764154434204, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.47433099150657654, + "rewards/EvidenceHallucination/std": 0.46519747376441956, + "rewards/Evidence_Num_Record/mean": 4.11904764175415, + "rewards/Evidence_Num_Record/std": 1.2337208986282349, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.2857142984867096, + "rewards/HonestTime/std": 0.45722994208335876, + "rewards/VideoAccuracy/mean": 0.49843770265579224, + "rewards/VideoAccuracy/std": 0.4565110504627228, + "step": 959, + "train_speed(iter/s)": 0.067155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 561.0, + "completions/mean_length": 423.3333435058594, + "completions/min_length": 269.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.439453125, + "entropy/min": 0.234375, + "epoch": 0.96, + "grad_norm": 1.2660896652549583, + "kl": 0.287109375, + "learning_rate": 8.045187169204658e-09, + "loss": 0.0028954786248505116, + "memory(GiB)": 137.41, + "reward": 1.407809853553772, + "reward_std": 0.25971004366874695, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.22925804555416107, + "rewards/EvidenceHallucination/std": 0.39457935094833374, + "rewards/Evidence_Num_Record/mean": 3.595238208770752, + "rewards/Evidence_Num_Record/std": 1.0135550498962402, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.32862478494644165, + "rewards/VideoAccuracy/std": 0.3720688819885254, + "step": 960, + "train_speed(iter/s)": 0.067007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 947.0, + "completions/mean_length": 550.2142944335938, + "completions/min_length": 360.0, + "entropy/max": 0.486328125, + "entropy/mean": 0.298828125, + "entropy/min": 0.1279296875, + "epoch": 0.961, + "grad_norm": 0.9837898086934238, + "kl": 0.22265625, + "learning_rate": 7.648463217118983e-09, + "loss": 0.0022573107853531837, + "memory(GiB)": 137.41, + "reward": 2.3989174365997314, + "reward_std": 0.07762474566698074, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7259609699249268, + "rewards/EvidenceHallucination/std": 0.36685147881507874, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.7904776334762573, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.9761905074119568, + "rewards/HonestTime/std": 0.15430334210395813, + "rewards/VideoAccuracy/mean": 1.0584871768951416, + "rewards/VideoAccuracy/std": 0.36890536546707153, + "step": 961, + "train_speed(iter/s)": 0.066794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 695.0, + "completions/mean_length": 476.6428527832031, + "completions/min_length": 293.0, + "entropy/max": 1.671875, + "entropy/mean": 0.54296875, + "entropy/min": 0.28515625, + "epoch": 0.962, + "grad_norm": 1.368957187885661, + "kl": 0.279296875, + "learning_rate": 7.261732233991513e-09, + "loss": 0.00282856822013855, + "memory(GiB)": 137.41, + "reward": 1.9559398889541626, + "reward_std": 0.14292877912521362, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6334197521209717, + "rewards/EvidenceHallucination/std": 0.39311859011650085, + "rewards/Evidence_Num_Record/mean": 4.452381134033203, + "rewards/Evidence_Num_Record/std": 1.4517000913619995, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3095238208770752, + "rewards/HonestTime/std": 0.4679011106491089, + "rewards/VideoAccuracy/mean": 0.7673510909080505, + "rewards/VideoAccuracy/std": 0.35943979024887085, + "step": 962, + "train_speed(iter/s)": 0.066592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 687.0, + "completions/mean_length": 397.1428527832031, + "completions/min_length": 269.0, + "entropy/max": 0.5390625, + "entropy/mean": 0.419921875, + "entropy/min": 0.28515625, + "epoch": 0.963, + "grad_norm": 1.4789215880791144, + "kl": 0.2734375, + "learning_rate": 6.884998114198959e-09, + "loss": 0.0027652564458549023, + "memory(GiB)": 137.41, + "reward": 1.682898759841919, + "reward_std": 0.264392614364624, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5018981695175171, + "rewards/EvidenceHallucination/std": 0.4526165723800659, + "rewards/Evidence_Num_Record/mean": 3.142857313156128, + "rewards/Evidence_Num_Record/std": 0.8715399503707886, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.5825192928314209, + "rewards/VideoAccuracy/std": 0.4478265643119812, + "step": 963, + "train_speed(iter/s)": 0.066416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 882.0, + "completions/mean_length": 517.3095092773438, + "completions/min_length": 284.0, + "entropy/max": 0.61328125, + "entropy/mean": 0.3515625, + "entropy/min": 0.1220703125, + "epoch": 0.964, + "grad_norm": 1.2469047557569142, + "kl": 0.2431640625, + "learning_rate": 6.518264651449779e-09, + "loss": 0.0024622909259051085, + "memory(GiB)": 137.41, + "reward": 2.220378875732422, + "reward_std": 0.16395387053489685, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7585100531578064, + "rewards/EvidenceHallucination/std": 0.3773439824581146, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 0.9258201122283936, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.9067720770835876, + "rewards/VideoAccuracy/std": 0.38802117109298706, + "step": 964, + "train_speed(iter/s)": 0.066269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1006.0, + "completions/mean_length": 560.8333740234375, + "completions/min_length": 321.0, + "entropy/max": 1.125, + "entropy/mean": 0.427734375, + "entropy/min": 0.126953125, + "epoch": 0.965, + "grad_norm": 1.1405925129994456, + "kl": 0.2294921875, + "learning_rate": 6.161535538745877e-09, + "loss": 0.0023431219160556793, + "memory(GiB)": 137.41, + "reward": 2.2555789947509766, + "reward_std": 0.0632232129573822, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.8684941530227661, + "rewards/EvidenceHallucination/std": 0.11418500542640686, + "rewards/Evidence_Num_Record/mean": 4.357142925262451, + "rewards/Evidence_Num_Record/std": 1.4283100366592407, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 1.0152136087417603, + "rewards/VideoAccuracy/std": 0.17440201342105865, + "step": 965, + "train_speed(iter/s)": 0.066059 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 776.0, + "completions/mean_length": 462.3571472167969, + "completions/min_length": 327.0, + "entropy/max": 1.28125, + "entropy/mean": 0.50390625, + "entropy/min": 0.2890625, + "epoch": 0.966, + "grad_norm": 1.234171819098209, + "kl": 0.27734375, + "learning_rate": 5.814814368345411e-09, + "loss": 0.002802126109600067, + "memory(GiB)": 137.41, + "reward": 1.6601600646972656, + "reward_std": 0.23421823978424072, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45707494020462036, + "rewards/EvidenceHallucination/std": 0.4481717646121979, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.1536942720413208, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5401737689971924, + "rewards/VideoAccuracy/std": 0.46416065096855164, + "step": 966, + "train_speed(iter/s)": 0.065872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 707.0, + "completions/mean_length": 474.5238037109375, + "completions/min_length": 354.0, + "entropy/max": 0.5625, + "entropy/mean": 0.41015625, + "entropy/min": 0.259765625, + "epoch": 0.967, + "grad_norm": 0.9856314384361605, + "kl": 0.2890625, + "learning_rate": 5.47810463172671e-09, + "loss": 0.00290171941742301, + "memory(GiB)": 137.41, + "reward": 1.616075038909912, + "reward_std": 0.13448190689086914, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.292513370513916, + "rewards/EvidenceHallucination/std": 0.4066556990146637, + "rewards/Evidence_Num_Record/mean": 4.023809432983398, + "rewards/Evidence_Num_Record/std": 0.6803189516067505, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.4575723111629486, + "rewards/VideoAccuracy/std": 0.5822301506996155, + "step": 967, + "train_speed(iter/s)": 0.065796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 862.0, + "completions/mean_length": 536.8095092773438, + "completions/min_length": 336.0, + "entropy/max": 0.78125, + "entropy/mean": 0.326171875, + "entropy/min": 0.119140625, + "epoch": 0.968, + "grad_norm": 1.0150307188913046, + "kl": 0.21875, + "learning_rate": 5.151409719553079e-09, + "loss": 0.0022225372958928347, + "memory(GiB)": 137.41, + "reward": 1.7808631658554077, + "reward_std": 0.0931321382522583, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.39275211095809937, + "rewards/EvidenceHallucination/std": 0.43423381447792053, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.9830148816108704, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6428571343421936, + "rewards/HonestTime/std": 0.48496562242507935, + "rewards/VideoAccuracy/mean": 0.5737413167953491, + "rewards/VideoAccuracy/std": 0.42062005400657654, + "step": 968, + "train_speed(iter/s)": 0.06562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 711.0, + "completions/mean_length": 480.40478515625, + "completions/min_length": 261.0, + "entropy/max": 0.9765625, + "entropy/mean": 0.416015625, + "entropy/min": 0.1611328125, + "epoch": 0.969, + "grad_norm": 1.1888175493601036, + "kl": 0.275390625, + "learning_rate": 4.834732921638718e-09, + "loss": 0.0027951847296208143, + "memory(GiB)": 137.41, + "reward": 1.6992765665054321, + "reward_std": 0.12343087792396545, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5417040586471558, + "rewards/EvidenceHallucination/std": 0.46338191628456116, + "rewards/Evidence_Num_Record/mean": 4.285714149475098, + "rewards/Evidence_Num_Record/std": 1.1952285766601562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.5623642802238464, + "rewards/VideoAccuracy/std": 0.46270543336868286, + "step": 969, + "train_speed(iter/s)": 0.065423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 846.0, + "completions/mean_length": 458.5, + "completions/min_length": 319.0, + "entropy/max": 0.8125, + "entropy/mean": 0.470703125, + "entropy/min": 0.23046875, + "epoch": 0.97, + "grad_norm": 1.4858103128918467, + "kl": 0.279296875, + "learning_rate": 4.528077426915411e-09, + "loss": 0.0028038870077580214, + "memory(GiB)": 137.41, + "reward": 1.5698540210723877, + "reward_std": 0.3884607255458832, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3760813772678375, + "rewards/EvidenceHallucination/std": 0.4657697379589081, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.157964825630188, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.4660661518573761, + "rewards/VideoAccuracy/std": 0.5560010671615601, + "step": 970, + "train_speed(iter/s)": 0.06524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1025.0, + "completions/mean_length": 563.1428833007812, + "completions/min_length": 324.0, + "entropy/max": 0.484375, + "entropy/mean": 0.255859375, + "entropy/min": 0.12255859375, + "epoch": 0.971, + "grad_norm": 1.0478267118673452, + "kl": 0.22265625, + "learning_rate": 4.231446323400556e-09, + "loss": 0.002258533611893654, + "memory(GiB)": 137.41, + "reward": 2.412954807281494, + "reward_std": 0.12907324731349945, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.66871577501297, + "rewards/EvidenceHallucination/std": 0.30906942486763, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.8502919673919678, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.079211711883545, + "rewards/VideoAccuracy/std": 0.3644346594810486, + "step": 971, + "train_speed(iter/s)": 0.065052 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 670.0, + "completions/mean_length": 481.23809814453125, + "completions/min_length": 336.0, + "entropy/max": 1.015625, + "entropy/mean": 0.484375, + "entropy/min": 0.283203125, + "epoch": 0.972, + "grad_norm": 1.1357201236936143, + "kl": 0.271484375, + "learning_rate": 3.944842598166187e-09, + "loss": 0.002750544808804989, + "memory(GiB)": 137.41, + "reward": 1.3433635234832764, + "reward_std": 0.11884146928787231, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.21864919364452362, + "rewards/EvidenceHallucination/std": 0.37373003363609314, + "rewards/Evidence_Num_Record/mean": 4.5714287757873535, + "rewards/Evidence_Num_Record/std": 1.3460485935211182, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.2710622549057007, + "rewards/VideoAccuracy/std": 0.4434342086315155, + "step": 972, + "train_speed(iter/s)": 0.064901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1152.0, + "completions/mean_length": 445.2857360839844, + "completions/min_length": 198.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.443359375, + "entropy/min": 0.2236328125, + "epoch": 0.973, + "grad_norm": 1.1542425201520503, + "kl": 0.265625, + "learning_rate": 3.6682691373086662e-09, + "loss": 0.0026901671662926674, + "memory(GiB)": 137.41, + "reward": 1.4638851881027222, + "reward_std": 0.21122592687606812, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3182029128074646, + "rewards/EvidenceHallucination/std": 0.4255477786064148, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 1.3110802173614502, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4002445936203003, + "rewards/VideoAccuracy/std": 0.4778297245502472, + "step": 973, + "train_speed(iter/s)": 0.064688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 694.0, + "completions/mean_length": 448.90478515625, + "completions/min_length": 329.0, + "entropy/max": 0.5625, + "entropy/mean": 0.34375, + "entropy/min": 0.16015625, + "epoch": 0.974, + "grad_norm": 1.2473136745873308, + "kl": 0.255859375, + "learning_rate": 3.4017287259193728e-09, + "loss": 0.002587102120742202, + "memory(GiB)": 137.41, + "reward": 2.5488216876983643, + "reward_std": 0.11489617824554443, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7000425457954407, + "rewards/EvidenceHallucination/std": 0.42865434288978577, + "rewards/Evidence_Num_Record/mean": 3.357142925262451, + "rewards/Evidence_Num_Record/std": 0.5328903794288635, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 1.2469083070755005, + "rewards/VideoAccuracy/std": 0.5960439443588257, + "step": 974, + "train_speed(iter/s)": 0.064561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 766.0, + "completions/mean_length": 486.26190185546875, + "completions/min_length": 288.0, + "entropy/max": 1.4140625, + "entropy/mean": 0.443359375, + "entropy/min": 0.1298828125, + "epoch": 0.975, + "grad_norm": 1.3893919596098658, + "kl": 0.2392578125, + "learning_rate": 3.1452240480577262e-09, + "loss": 0.002424593549221754, + "memory(GiB)": 137.41, + "reward": 2.1735761165618896, + "reward_std": 0.06262548267841339, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7403359413146973, + "rewards/EvidenceHallucination/std": 0.3639848828315735, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 1.092950701713562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9588421583175659, + "rewards/VideoAccuracy/std": 0.11992564797401428, + "step": 975, + "train_speed(iter/s)": 0.064283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 604.0, + "completions/mean_length": 461.66668701171875, + "completions/min_length": 280.0, + "entropy/max": 0.88671875, + "entropy/mean": 0.44140625, + "entropy/min": 0.271484375, + "epoch": 0.976, + "grad_norm": 1.0344104124820364, + "kl": 0.28125, + "learning_rate": 2.8987576867225415e-09, + "loss": 0.002832625526934862, + "memory(GiB)": 137.41, + "reward": 1.4347890615463257, + "reward_std": 0.2360856980085373, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.33469676971435547, + "rewards/EvidenceHallucination/std": 0.4358334541320801, + "rewards/Evidence_Num_Record/mean": 3.904762029647827, + "rewards/Evidence_Num_Record/std": 0.9830148816108704, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.3440402150154114, + "rewards/VideoAccuracy/std": 0.43539971113204956, + "step": 976, + "train_speed(iter/s)": 0.064168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 588.0, + "completions/mean_length": 439.9761962890625, + "completions/min_length": 274.0, + "entropy/max": 0.59375, + "entropy/mean": 0.42578125, + "entropy/min": 0.224609375, + "epoch": 0.977, + "grad_norm": 1.479544814511891, + "kl": 0.2890625, + "learning_rate": 2.662332123827715e-09, + "loss": 0.002926081418991089, + "memory(GiB)": 137.41, + "reward": 2.1106927394866943, + "reward_std": 0.24937795102596283, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6478333473205566, + "rewards/EvidenceHallucination/std": 0.38507360219955444, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.8502919673919678, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.8811261653900146, + "rewards/VideoAccuracy/std": 0.5433034300804138, + "step": 977, + "train_speed(iter/s)": 0.064028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1154.0, + "completions/mean_length": 550.6904907226562, + "completions/min_length": 327.0, + "entropy/max": 1.53125, + "entropy/mean": 0.3828125, + "entropy/min": 0.1259765625, + "epoch": 0.978, + "grad_norm": 0.8558925233546997, + "kl": 0.2138671875, + "learning_rate": 2.435949740175802e-09, + "loss": 0.002151726046577096, + "memory(GiB)": 137.41, + "reward": 1.85798978805542, + "reward_std": 0.1420179307460785, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.36319100856781006, + "rewards/EvidenceHallucination/std": 0.4514017403125763, + "rewards/Evidence_Num_Record/mean": 3.857142925262451, + "rewards/Evidence_Num_Record/std": 1.0257996320724487, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.652018129825592, + "rewards/VideoAccuracy/std": 0.4988376200199127, + "step": 978, + "train_speed(iter/s)": 0.06383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 543.0, + "completions/mean_length": 423.5238037109375, + "completions/min_length": 347.0, + "entropy/max": 1.1171875, + "entropy/mean": 0.4765625, + "entropy/min": 0.353515625, + "epoch": 0.979, + "grad_norm": 1.3022093115598243, + "kl": 0.287109375, + "learning_rate": 2.2196128154349235e-09, + "loss": 0.0028963349759578705, + "memory(GiB)": 137.41, + "reward": 1.6460776329040527, + "reward_std": 0.1852813959121704, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5184386372566223, + "rewards/EvidenceHallucination/std": 0.46547335386276245, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.7261499166488647, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.509056568145752, + "rewards/VideoAccuracy/std": 0.46530571579933167, + "step": 979, + "train_speed(iter/s)": 0.063673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1132.0, + "completions/mean_length": 504.16668701171875, + "completions/min_length": 260.0, + "entropy/max": 0.66015625, + "entropy/mean": 0.388671875, + "entropy/min": 0.232421875, + "epoch": 0.98, + "grad_norm": 1.3549656682464233, + "kl": 0.267578125, + "learning_rate": 2.0133235281156735e-09, + "loss": 0.0026948326267302036, + "memory(GiB)": 137.41, + "reward": 1.8087973594665527, + "reward_std": 0.4004395008087158, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5858333110809326, + "rewards/EvidenceHallucination/std": 0.44859325885772705, + "rewards/Evidence_Num_Record/mean": 4.595238208770752, + "rewards/Evidence_Num_Record/std": 2.0489084720611572, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.6582973599433899, + "rewards/VideoAccuracy/std": 0.4996359050273895, + "step": 980, + "train_speed(iter/s)": 0.063485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 917.0, + "completions/mean_length": 550.7619018554688, + "completions/min_length": 344.0, + "entropy/max": 0.6484375, + "entropy/mean": 0.2890625, + "entropy/min": 0.1328125, + "epoch": 0.981, + "grad_norm": 1.019291494838437, + "kl": 0.2216796875, + "learning_rate": 1.8170839555486927e-09, + "loss": 0.0022596633061766624, + "memory(GiB)": 137.41, + "reward": 2.449709892272949, + "reward_std": 0.13686299324035645, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.8575530648231506, + "rewards/EvidenceHallucination/std": 0.17645077407360077, + "rewards/Evidence_Num_Record/mean": 3.7857143878936768, + "rewards/Evidence_Num_Record/std": 0.5646373629570007, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.0781992673873901, + "rewards/VideoAccuracy/std": 0.2899397015571594, + "step": 981, + "train_speed(iter/s)": 0.063216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1224.0, + "completions/mean_length": 483.5476379394531, + "completions/min_length": 357.0, + "entropy/max": 3.0625, + "entropy/mean": 0.58984375, + "entropy/min": 0.267578125, + "epoch": 0.982, + "grad_norm": 1.3356176737464052, + "kl": 0.279296875, + "learning_rate": 1.6308960738643518e-09, + "loss": 0.0028589987196028233, + "memory(GiB)": 137.41, + "reward": 1.6344343423843384, + "reward_std": 0.2198060005903244, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.45886075496673584, + "rewards/EvidenceHallucination/std": 0.4734567403793335, + "rewards/Evidence_Num_Record/mean": 4.190476417541504, + "rewards/Evidence_Num_Record/std": 2.1779167652130127, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5093289613723755, + "rewards/VideoAccuracy/std": 0.47165292501449585, + "step": 982, + "train_speed(iter/s)": 0.063085 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 857.0, + "completions/mean_length": 492.5238037109375, + "completions/min_length": 323.0, + "entropy/max": 0.828125, + "entropy/mean": 0.46484375, + "entropy/min": 0.30078125, + "epoch": 0.983, + "grad_norm": 1.243158746708228, + "kl": 0.259765625, + "learning_rate": 1.4547617579725446e-09, + "loss": 0.0026392736472189426, + "memory(GiB)": 137.41, + "reward": 1.5035511255264282, + "reward_std": 0.30378079414367676, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3797284960746765, + "rewards/EvidenceHallucination/std": 0.46767836809158325, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.6088099479675293, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.42760542035102844, + "rewards/VideoAccuracy/std": 0.4682418406009674, + "step": 983, + "train_speed(iter/s)": 0.062895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 766.0, + "completions/mean_length": 468.71429443359375, + "completions/min_length": 350.0, + "entropy/max": 0.5546875, + "entropy/mean": 0.35546875, + "entropy/min": 0.158203125, + "epoch": 0.984, + "grad_norm": 1.3097752195029715, + "kl": 0.263671875, + "learning_rate": 1.2886827815440372e-09, + "loss": 0.002676494186744094, + "memory(GiB)": 137.41, + "reward": 2.417630672454834, + "reward_std": 0.18030908703804016, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.756335973739624, + "rewards/EvidenceHallucination/std": 0.3809882700443268, + "rewards/Evidence_Num_Record/mean": 3.4761905670166016, + "rewards/Evidence_Num_Record/std": 0.6339229941368103, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8333333730697632, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 1.0996966361999512, + "rewards/VideoAccuracy/std": 0.4313112795352936, + "step": 984, + "train_speed(iter/s)": 0.062762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 749.0, + "completions/mean_length": 499.452392578125, + "completions/min_length": 310.0, + "entropy/max": 1.6484375, + "entropy/mean": 0.4765625, + "entropy/min": 0.134765625, + "epoch": 0.985, + "grad_norm": 1.2582956870984567, + "kl": 0.22265625, + "learning_rate": 1.1326608169920371e-09, + "loss": 0.002244186121970415, + "memory(GiB)": 137.41, + "reward": 2.0736777782440186, + "reward_std": 0.18576598167419434, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.649118959903717, + "rewards/EvidenceHallucination/std": 0.3512740433216095, + "rewards/Evidence_Num_Record/mean": 4.38095235824585, + "rewards/Evidence_Num_Record/std": 1.8206455707550049, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.877187192440033, + "rewards/VideoAccuracy/std": 0.3439772427082062, + "step": 985, + "train_speed(iter/s)": 0.062612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 602.0, + "completions/mean_length": 458.6190490722656, + "completions/min_length": 299.0, + "entropy/max": 0.76953125, + "entropy/mean": 0.4765625, + "entropy/min": 0.306640625, + "epoch": 0.986, + "grad_norm": 1.4153108898347777, + "kl": 0.26171875, + "learning_rate": 9.866974354560964e-10, + "loss": 0.0026454180479049683, + "memory(GiB)": 137.41, + "reward": 1.6579073667526245, + "reward_std": 0.4413139820098877, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.4636135995388031, + "rewards/EvidenceHallucination/std": 0.4519502520561218, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9997095465660095, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1190476194024086, + "rewards/HonestTime/std": 0.32777008414268494, + "rewards/VideoAccuracy/mean": 0.5413752794265747, + "rewards/VideoAccuracy/std": 0.4650043547153473, + "step": 986, + "train_speed(iter/s)": 0.062456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 581.0, + "completions/mean_length": 461.9285888671875, + "completions/min_length": 313.0, + "entropy/max": 0.56640625, + "entropy/mean": 0.4140625, + "entropy/min": 0.28515625, + "epoch": 0.987, + "grad_norm": 1.4294908511870958, + "kl": 0.291015625, + "learning_rate": 8.507941067859015e-10, + "loss": 0.0029428645502775908, + "memory(GiB)": 137.41, + "reward": 1.8523160219192505, + "reward_std": 0.13490135967731476, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5050612688064575, + "rewards/EvidenceHallucination/std": 0.4884006679058075, + "rewards/Evidence_Num_Record/mean": 3.761904716491699, + "rewards/Evidence_Num_Record/std": 0.6917465925216675, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.5, + "rewards/HonestTime/std": 0.5060608386993408, + "rewards/VideoAccuracy/mean": 0.651303768157959, + "rewards/VideoAccuracy/std": 0.5998431444168091, + "step": 987, + "train_speed(iter/s)": 0.062331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 789.0, + "completions/mean_length": 547.4285888671875, + "completions/min_length": 367.0, + "entropy/max": 0.91796875, + "entropy/mean": 0.3203125, + "entropy/min": 0.15625, + "epoch": 0.988, + "grad_norm": 0.8890710600096047, + "kl": 0.2158203125, + "learning_rate": 7.249521995263963e-10, + "loss": 0.0021866951137781143, + "memory(GiB)": 137.41, + "reward": 1.7507095336914062, + "reward_std": 0.07063025236129761, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3379446864128113, + "rewards/EvidenceHallucination/std": 0.42451003193855286, + "rewards/Evidence_Num_Record/mean": 3.642857313156128, + "rewards/Evidence_Num_Record/std": 0.8211066126823425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6666666865348816, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.5497872829437256, + "rewards/VideoAccuracy/std": 0.39387640357017517, + "step": 988, + "train_speed(iter/s)": 0.062227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 756.0, + "completions/mean_length": 477.0476379394531, + "completions/min_length": 338.0, + "entropy/max": 0.7578125, + "entropy/mean": 0.455078125, + "entropy/min": 0.306640625, + "epoch": 0.989, + "grad_norm": 1.1880257355559192, + "kl": 0.275390625, + "learning_rate": 6.091729809042379e-10, + "loss": 0.0027825646102428436, + "memory(GiB)": 137.41, + "reward": 1.5465867519378662, + "reward_std": 0.11989744752645493, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.389413446187973, + "rewards/EvidenceHallucination/std": 0.46374890208244324, + "rewards/Evidence_Num_Record/mean": 4.404761791229248, + "rewards/Evidence_Num_Record/std": 1.0135550498962402, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.43537071347236633, + "rewards/VideoAccuracy/std": 0.4611511826515198, + "step": 989, + "train_speed(iter/s)": 0.062084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 705.0, + "completions/mean_length": 466.4285888671875, + "completions/min_length": 337.0, + "entropy/max": 0.65625, + "entropy/mean": 0.443359375, + "entropy/min": 0.296875, + "epoch": 0.99, + "grad_norm": 1.16863477938996, + "kl": 0.283203125, + "learning_rate": 5.034576168149174e-10, + "loss": 0.002865845337510109, + "memory(GiB)": 137.41, + "reward": 1.6263631582260132, + "reward_std": 0.3727450370788574, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3417263925075531, + "rewards/EvidenceHallucination/std": 0.42671677470207214, + "rewards/Evidence_Num_Record/mean": 4.261904716491699, + "rewards/Evidence_Num_Record/std": 1.3262733221054077, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.5246845483779907, + "rewards/VideoAccuracy/std": 0.5592222213745117, + "step": 990, + "train_speed(iter/s)": 0.06194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 774.0, + "completions/mean_length": 513.5952758789062, + "completions/min_length": 353.0, + "entropy/max": 0.470703125, + "entropy/mean": 0.25390625, + "entropy/min": 0.115234375, + "epoch": 0.991, + "grad_norm": 1.1156864175554875, + "kl": 0.2265625, + "learning_rate": 4.078071718107701e-10, + "loss": 0.002295741345733404, + "memory(GiB)": 137.41, + "reward": 2.361910820007324, + "reward_std": 0.0892949029803276, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6723664999008179, + "rewards/EvidenceHallucination/std": 0.4161950349807739, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.594203531742096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 1.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 1.027437448501587, + "rewards/VideoAccuracy/std": 0.38673079013824463, + "step": 991, + "train_speed(iter/s)": 0.06179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 767.0, + "completions/mean_length": 482.26190185546875, + "completions/min_length": 282.0, + "entropy/max": 0.9140625, + "entropy/mean": 0.4609375, + "entropy/min": 0.267578125, + "epoch": 0.992, + "grad_norm": 1.443864793433425, + "kl": 0.255859375, + "learning_rate": 3.2222260909087194e-10, + "loss": 0.002598737133666873, + "memory(GiB)": 137.41, + "reward": 1.8899970054626465, + "reward_std": 0.306724488735199, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6235135793685913, + "rewards/EvidenceHallucination/std": 0.40866008400917053, + "rewards/Evidence_Num_Record/mean": 4.61904764175415, + "rewards/Evidence_Num_Record/std": 1.4971905946731567, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1666666716337204, + "rewards/HonestTime/std": 0.37719547748565674, + "rewards/VideoAccuracy/mean": 0.7319609522819519, + "rewards/VideoAccuracy/std": 0.4215676784515381, + "step": 992, + "train_speed(iter/s)": 0.061501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 814.0, + "completions/mean_length": 460.9285888671875, + "completions/min_length": 304.0, + "entropy/max": 0.83984375, + "entropy/mean": 0.47265625, + "entropy/min": 0.265625, + "epoch": 0.993, + "grad_norm": 1.2905090614888646, + "kl": 0.251953125, + "learning_rate": 2.4670479049082594e-10, + "loss": 0.002519114874303341, + "memory(GiB)": 137.41, + "reward": 1.3910771608352661, + "reward_std": 0.3596101999282837, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.29593271017074585, + "rewards/EvidenceHallucination/std": 0.43152523040771484, + "rewards/Evidence_Num_Record/mean": 3.9761905670166016, + "rewards/Evidence_Num_Record/std": 0.9236221313476562, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.33189061284065247, + "rewards/VideoAccuracy/std": 0.47508248686790466, + "step": 993, + "train_speed(iter/s)": 0.061359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 657.0, + "completions/mean_length": 453.8333435058594, + "completions/min_length": 339.0, + "entropy/max": 0.6171875, + "entropy/mean": 0.373046875, + "entropy/min": 0.1611328125, + "epoch": 0.994, + "grad_norm": 1.1471652869518276, + "kl": 0.267578125, + "learning_rate": 1.81254476474213e-10, + "loss": 0.0026834774762392044, + "memory(GiB)": 137.41, + "reward": 2.1440560817718506, + "reward_std": 0.13914991915225983, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.6713060736656189, + "rewards/EvidenceHallucination/std": 0.3882835805416107, + "rewards/Evidence_Num_Record/mean": 3.5238096714019775, + "rewards/Evidence_Num_Record/std": 0.8333913683891296, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.8095238208770752, + "rewards/HonestTime/std": 0.39743661880493164, + "rewards/VideoAccuracy/mean": 0.8478900790214539, + "rewards/VideoAccuracy/std": 0.42282700538635254, + "step": 994, + "train_speed(iter/s)": 0.061239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 839.0, + "completions/mean_length": 507.3809509277344, + "completions/min_length": 355.0, + "entropy/max": 0.85546875, + "entropy/mean": 0.41796875, + "entropy/min": 0.1572265625, + "epoch": 0.995, + "grad_norm": 1.2029400801179002, + "kl": 0.2451171875, + "learning_rate": 1.258723261249317e-10, + "loss": 0.0024793946649879217, + "memory(GiB)": 137.41, + "reward": 2.1752610206604004, + "reward_std": 0.13766251504421234, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.8124963641166687, + "rewards/EvidenceHallucination/std": 0.31519562005996704, + "rewards/Evidence_Num_Record/mean": 3.7142858505249023, + "rewards/Evidence_Num_Record/std": 0.7419721484184265, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.3333333432674408, + "rewards/HonestTime/std": 0.47711876034736633, + "rewards/VideoAccuracy/mean": 0.9460952281951904, + "rewards/VideoAccuracy/std": 0.3199745714664459, + "step": 995, + "train_speed(iter/s)": 0.061039 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 736.0, + "completions/mean_length": 471.26190185546875, + "completions/min_length": 349.0, + "entropy/max": 0.65625, + "entropy/mean": 0.4453125, + "entropy/min": 0.275390625, + "epoch": 0.996, + "grad_norm": 0.9239728844892342, + "kl": 0.259765625, + "learning_rate": 8.055889714064789e-11, + "loss": 0.0026135165244340897, + "memory(GiB)": 137.41, + "reward": 1.479562759399414, + "reward_std": 0.18091247975826263, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.3740043044090271, + "rewards/EvidenceHallucination/std": 0.4622194170951843, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.1243788003921509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.4047619104385376, + "rewards/VideoAccuracy/std": 0.49679574370384216, + "step": 996, + "train_speed(iter/s)": 0.060871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 665.0, + "completions/mean_length": 452.0238037109375, + "completions/min_length": 324.0, + "entropy/max": 0.57421875, + "entropy/mean": 0.43359375, + "entropy/min": 0.29296875, + "epoch": 0.997, + "grad_norm": 1.3533168229363475, + "kl": 0.28125, + "learning_rate": 4.5314645827132516e-11, + "loss": 0.0028381943702697754, + "memory(GiB)": 137.41, + "reward": 2.030975103378296, + "reward_std": 0.26551011204719543, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5573198795318604, + "rewards/EvidenceHallucination/std": 0.44968104362487793, + "rewards/Evidence_Num_Record/mean": 3.547619104385376, + "rewards/Evidence_Num_Record/std": 0.8323455452919006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.4761904776096344, + "rewards/HonestTime/std": 0.5054867267608643, + "rewards/VideoAccuracy/mean": 0.8242730498313904, + "rewards/VideoAccuracy/std": 0.5885381698608398, + "step": 997, + "train_speed(iter/s)": 0.060753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 842.0, + "completions/mean_length": 598.6428833007812, + "completions/min_length": 395.0, + "entropy/max": 0.97265625, + "entropy/mean": 0.3359375, + "entropy/min": 0.1318359375, + "epoch": 0.998, + "grad_norm": 1.034021068336648, + "kl": 0.20703125, + "learning_rate": 2.0139927093487663e-11, + "loss": 0.002108077285811305, + "memory(GiB)": 137.41, + "reward": 2.166386127471924, + "reward_std": 0.21136659383773804, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.7145857214927673, + "rewards/EvidenceHallucination/std": 0.3231663107872009, + "rewards/Evidence_Num_Record/mean": 4.309524059295654, + "rewards/Evidence_Num_Record/std": 1.0704021453857422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.6190476417541504, + "rewards/HonestTime/std": 0.4915074408054352, + "rewards/VideoAccuracy/mean": 0.8996596932411194, + "rewards/VideoAccuracy/std": 0.3587428331375122, + "step": 998, + "train_speed(iter/s)": 0.060568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 812.0, + "completions/mean_length": 470.5476379394531, + "completions/min_length": 328.0, + "entropy/max": 0.67578125, + "entropy/mean": 0.41796875, + "entropy/min": 0.251953125, + "epoch": 0.999, + "grad_norm": 1.3317680302128188, + "kl": 0.271484375, + "learning_rate": 5.034994448926966e-12, + "loss": 0.002765919780358672, + "memory(GiB)": 137.41, + "reward": 1.734266757965088, + "reward_std": 0.38671621680259705, + "rewards/EvidenceFormat/mean": 1.0, + "rewards/EvidenceFormat/std": 0.0, + "rewards/EvidenceHallucination/mean": 0.5760961174964905, + "rewards/EvidenceHallucination/std": 0.4836040437221527, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 1.528855323791504, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.0, + "rewards/HonestTime/std": 0.0, + "rewards/VideoAccuracy/mean": 0.6190476417541504, + "rewards/VideoAccuracy/std": 0.4915074408054352, + "step": 999, + "train_speed(iter/s)": 0.060436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1036.0, + "completions/mean_length": 504.5238037109375, + "completions/min_length": 274.0, + "entropy/max": 0.80859375, + "entropy/mean": 0.4609375, + "entropy/min": 0.251953125, + "epoch": 1.0, + "grad_norm": 1.28633537826959, + "kl": 0.2421875, + "learning_rate": 0.0, + "loss": 0.0024761264212429523, + "memory(GiB)": 137.41, + "reward": 1.4781855344772339, + "reward_std": 0.22372941672801971, + "rewards/EvidenceFormat/mean": 0.9761905074119568, + "rewards/EvidenceFormat/std": 0.15430335700511932, + "rewards/EvidenceHallucination/mean": 0.3453802764415741, + "rewards/EvidenceHallucination/std": 0.4514380693435669, + "rewards/Evidence_Num_Record/mean": 4.166666507720947, + "rewards/Evidence_Num_Record/std": 2.8190698623657227, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "rewards/HonestTime/mean": 0.1428571492433548, + "rewards/HonestTime/std": 0.3541688024997711, + "rewards/VideoAccuracy/mean": 0.3924427628517151, + "rewards/VideoAccuracy/std": 0.49488815665245056, + "step": 1000, + "train_speed(iter/s)": 0.06029 + } + ], + "logging_steps": 1, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 7, + "trial_name": null, + "trial_params": null +}