diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7634 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18198362147406733, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.16666666666666666, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.08333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0625, + "TT_Grounding/mode_1": 0.3125, + "TT_Math/mode_0": 0.4605263157894737, + "TT_Math/mode_1": 0.4605263157894737, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 377.5625, + "completion_length/mode_0": 383.5234375, + "completion_length/mode_1": 371.6015625, + "epoch": 0.0009099181073703367, + "format_confidence": 0.5, + "grad_norm": 1.5429338116205915, + "grounded_proportion": 0.5, + "kl": 0.0, + "learning_rate": 1e-06, + "loss": 0.0, + "over_lengthy_sequences": 0.00390625, + "reward": 1.3203125, + "reward_std": 0.33722585439682007, + "rewards/format_reward": 0.9765625, + "rewards/general_task_reward": 0.34375, + "step": 1 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5833333333333334, + "TT_Counting/mode_0": 0.0625, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.40384615384615385, + "TT_Math/mode_1": 0.38461538461538464, + "TT_OCR/mode_0": 0.125, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.4375, + "completion_length": 304.2890625, + "completion_length/mode_0": 337.1171875, + "completion_length/mode_1": 271.4609375, + "epoch": 0.0018198362147406734, + "format_confidence": 0.5, + "grad_norm": 1.1197813674763613, + "grounded_proportion": 0.5, + "kl": 0.000415802001953125, + "learning_rate": 9.990900818926296e-07, + "loss": 0.0, + "over_lengthy_sequences": 0.0, + "reward": 1.375, + "reward_std": 0.2749903202056885, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.375, + "step": 2 + }, + { + "TT_Chart/mode_0": 0.9166666666666666, + "TT_Chart/mode_1": 0.9166666666666666, + "TT_Counting/mode_0": 0.0625, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.625, + "TT_Math/mode_1": 0.5340909090909091, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.375, + "completion_length": 367.0625, + "completion_length/mode_0": 395.9765625, + "completion_length/mode_1": 338.1484375, + "epoch": 0.00272975432211101, + "format_confidence": 0.5, + "grad_norm": 2.0139122368418265, + "grounded_proportion": 0.5, + "kl": 0.0003833770751953125, + "learning_rate": 9.981801637852592e-07, + "loss": 0.0, + "over_lengthy_sequences": 0.0, + "reward": 1.5078125, + "reward_std": 0.33350804448127747, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.515625, + "step": 3 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.703125, + "TT_Math/mode_1": 0.59375, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.375, + "completion_length": 311.7890625, + "completion_length/mode_0": 331.875, + "completion_length/mode_1": 291.703125, + "epoch": 0.003639672429481347, + "format_confidence": 0.5, + "grad_norm": 0.9407069347152167, + "grounded_proportion": 0.5, + "kl": 0.0003643035888671875, + "learning_rate": 9.97270245677889e-07, + "loss": 0.0, + "over_lengthy_sequences": 0.0, + "reward": 1.3828125, + "reward_std": 0.27103859186172485, + "rewards/format_reward": 0.98046875, + "rewards/general_task_reward": 0.40234375, + "step": 4 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.5625, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.36764705882352944, + "TT_Math/mode_1": 0.2647058823529412, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.375, + "completion_length": 366.9765625, + "completion_length/mode_0": 407.4140625, + "completion_length/mode_1": 326.5390625, + "epoch": 0.004549590536851683, + "format_confidence": 0.5, + "grad_norm": 0.9635797377965721, + "grounded_proportion": 0.5, + "kl": 0.00054931640625, + "learning_rate": 9.963603275705185e-07, + "loss": 0.0, + "over_lengthy_sequences": 0.00390625, + "reward": 1.32421875, + "reward_std": 0.26956743001937866, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.33203125, + "step": 5 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.08333333333333333, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.3125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.3333333333333333, + "TT_Document/mode_1": 0.08333333333333333, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4, + "TT_Math/mode_1": 0.48333333333333334, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.75, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.5, + "completion_length": 322.3125, + "completion_length/mode_0": 345.171875, + "completion_length/mode_1": 299.453125, + "epoch": 0.00545950864422202, + "format_confidence": 0.5, + "grad_norm": 1.073086357937483, + "grounded_proportion": 0.5, + "kl": 0.000858306884765625, + "learning_rate": 9.954504094631483e-07, + "loss": 0.0, + "over_lengthy_sequences": 0.0, + "reward": 1.3046875, + "reward_std": 0.3800785541534424, + "rewards/format_reward": 0.98046875, + "rewards/general_task_reward": 0.32421875, + "step": 6 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.75, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5588235294117647, + "TT_Math/mode_1": 0.4264705882352941, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.25, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.625, + "TT_Science/mode_1": 0.25, + "completion_length": 292.5859375, + "completion_length/mode_0": 318.9609375, + "completion_length/mode_1": 266.2109375, + "epoch": 0.006369426751592357, + "format_confidence": 0.5, + "grad_norm": 0.8168275363403603, + "grounded_proportion": 0.5, + "kl": 0.0014495849609375, + "learning_rate": 9.94540491355778e-07, + "loss": 0.0001, + "over_lengthy_sequences": 0.0, + "reward": 1.40625, + "reward_std": 0.32825323939323425, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40625, + "step": 7 + }, + { + "TT_Chart/mode_0": 0.15, + "TT_Chart/mode_1": 0.15, + "TT_Counting/mode_0": 0.6666666666666666, + "TT_Counting/mode_1": 0.16666666666666666, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.65, + "TT_Math/mode_1": 0.525, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0625, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.375, + "completion_length": 350.88671875, + "completion_length/mode_0": 379.875, + "completion_length/mode_1": 321.8984375, + "epoch": 0.007279344858962694, + "format_confidence": 0.5, + "grad_norm": 0.9037954457959693, + "grounded_proportion": 0.5, + "kl": 0.000934600830078125, + "learning_rate": 9.936305732484076e-07, + "loss": 0.0, + "over_lengthy_sequences": 0.0, + "reward": 1.34375, + "reward_std": 0.3212430477142334, + "rewards/format_reward": 0.98828125, + "rewards/general_task_reward": 0.35546875, + "step": 8 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.5454545454545454, + "TT_Math/mode_1": 0.5227272727272727, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0625, + "TT_Science/mode_1": 0.375, + "completion_length": 373.3203125, + "completion_length/mode_0": 413.8046875, + "completion_length/mode_1": 332.8359375, + "epoch": 0.00818926296633303, + "format_confidence": 0.5, + "grad_norm": 1.5911208325130124, + "grounded_proportion": 0.5, + "kl": 0.001251220703125, + "learning_rate": 9.927206551410372e-07, + "loss": 0.0001, + "over_lengthy_sequences": 0.0, + "reward": 1.4296875, + "reward_std": 0.3262837529182434, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.43359375, + "step": 9 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.5833333333333334, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5833333333333334, + "TT_Math/mode_0": 0.5357142857142857, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0625, + "TT_Science/mode_1": 0.375, + "completion_length": 372.32421875, + "completion_length/mode_0": 402.265625, + "completion_length/mode_1": 342.3828125, + "epoch": 0.009099181073703366, + "format_confidence": 0.5, + "grad_norm": 0.9600405491472309, + "grounded_proportion": 0.5, + "kl": 0.0033721923828125, + "learning_rate": 9.918107370336669e-07, + "loss": 0.0001, + "over_lengthy_sequences": 0.0, + "reward": 1.4375, + "reward_std": 0.270779013633728, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4375, + "step": 10 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.375, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.47368421052631576, + "TT_Math/mode_1": 0.39473684210526316, + "TT_OCR/mode_0": 0.125, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.875, + "TT_Science/mode_1": 0.625, + "completion_length": 387.62109375, + "completion_length/mode_0": 418.53125, + "completion_length/mode_1": 356.7109375, + "epoch": 0.010009099181073703, + "format_confidence": 0.5, + "grad_norm": 0.8976670020100668, + "grounded_proportion": 0.5, + "kl": 0.0032501220703125, + "learning_rate": 9.909008189262967e-07, + "loss": 0.0001, + "over_lengthy_sequences": 0.0, + "reward": 1.40625, + "reward_std": 0.24920988082885742, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40625, + "step": 11 + }, + { + "TT_Chart/mode_0": 0.15, + "TT_Chart/mode_1": 0.3, + "TT_Counting/mode_0": 0.1875, + "TT_Counting/mode_1": 0.3125, + "TT_Detection/mode_0": 0.375, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.375, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.546875, + "TT_Math/mode_1": 0.484375, + "TT_OCR/mode_0": 0.125, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.5, + "completion_length": 373.30859375, + "completion_length/mode_0": 404.21875, + "completion_length/mode_1": 342.3984375, + "epoch": 0.01091901728844404, + "format_confidence": 0.5, + "grad_norm": 0.7415423083376748, + "grounded_proportion": 0.5, + "kl": 0.006988525390625, + "learning_rate": 9.899909008189261e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0, + "reward": 1.3828125, + "reward_std": 0.29287609457969666, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3828125, + "step": 12 + }, + { + "TT_Chart/mode_0": 0.3125, + "TT_Chart/mode_1": 0.4375, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4666666666666667, + "TT_Math/mode_1": 0.55, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 1.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.2, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 272.83984375, + "completion_length/mode_0": 305.1953125, + "completion_length/mode_1": 240.484375, + "epoch": 0.011828935395814377, + "format_confidence": 0.5, + "grad_norm": 1.416444491434725, + "grounded_proportion": 0.5, + "kl": 0.01019287109375, + "learning_rate": 9.89080982711556e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.3671875, + "reward_std": 0.2200184315443039, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.37109375, + "step": 13 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.875, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.08333333333333333, + "TT_Math/mode_0": 0.5166666666666667, + "TT_Math/mode_1": 0.43333333333333335, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 334.30859375, + "completion_length/mode_0": 376.7578125, + "completion_length/mode_1": 291.859375, + "epoch": 0.012738853503184714, + "format_confidence": 0.5, + "grad_norm": 1.2195596640953166, + "grounded_proportion": 0.5, + "kl": 0.0172119140625, + "learning_rate": 9.881710646041856e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.33984375, + "reward_std": 0.25836920738220215, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.34375, + "step": 14 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0625, + "TT_Document/mode_1": 0.1875, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.5454545454545454, + "TT_Math/mode_1": 0.4431818181818182, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 350.69921875, + "completion_length/mode_0": 371.78125, + "completion_length/mode_1": 329.6171875, + "epoch": 0.01364877161055505, + "format_confidence": 0.5, + "grad_norm": 0.9960620613889194, + "grounded_proportion": 0.5, + "kl": 0.025146484375, + "learning_rate": 9.872611464968153e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.00390625, + "reward": 1.39453125, + "reward_std": 0.29446732997894287, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.3984375, + "step": 15 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.15, + "TT_Grounding/mode_1": 0.35, + "TT_Math/mode_0": 0.5294117647058824, + "TT_Math/mode_1": 0.5147058823529411, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 351.96484375, + "completion_length/mode_0": 384.609375, + "completion_length/mode_1": 319.3203125, + "epoch": 0.014558689717925387, + "format_confidence": 0.5, + "grad_norm": 0.9260258786381415, + "grounded_proportion": 0.5, + "kl": 0.0201416015625, + "learning_rate": 9.863512283894449e-07, + "loss": 0.0008, + "over_lengthy_sequences": 0.0, + "reward": 1.3359375, + "reward_std": 0.19503436982631683, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3359375, + "step": 16 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.45, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.47619047619047616, + "TT_Math/mode_1": 0.42857142857142855, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.5, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.25, + "completion_length": 368.91796875, + "completion_length/mode_0": 401.9609375, + "completion_length/mode_1": 335.875, + "epoch": 0.015468607825295723, + "format_confidence": 0.5, + "grad_norm": 0.8885940593044405, + "grounded_proportion": 0.5, + "kl": 0.007049560546875, + "learning_rate": 9.854413102820745e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.2695994973182678, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.390625, + "step": 17 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.16666666666666666, + "TT_Counting/mode_1": 0.16666666666666666, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.16666666666666666, + "TT_Document/mode_1": 0.16666666666666666, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.5384615384615384, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.75, + "TT_Science/mode_0": 0.625, + "TT_Science/mode_1": 0.75, + "completion_length": 266.00390625, + "completion_length/mode_0": 279.5390625, + "completion_length/mode_1": 252.46875, + "epoch": 0.01637852593266606, + "format_confidence": 0.5, + "grad_norm": 1.4954519102348234, + "grounded_proportion": 0.5, + "kl": 0.00970458984375, + "learning_rate": 9.845313921747044e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.3671875, + "reward_std": 0.30023884773254395, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.37109375, + "step": 18 + }, + { + "TT_Chart/mode_0": 0.3333333333333333, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.4375, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.375, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5192307692307693, + "TT_Math/mode_1": 0.4423076923076923, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 305.93359375, + "completion_length/mode_0": 336.4140625, + "completion_length/mode_1": 275.453125, + "epoch": 0.017288444040036398, + "format_confidence": 0.5, + "grad_norm": 1.2648060156416405, + "grounded_proportion": 0.5, + "kl": 0.00762939453125, + "learning_rate": 9.836214740673338e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0, + "reward": 1.3359375, + "reward_std": 0.22764958441257477, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3359375, + "step": 19 + }, + { + "TT_Chart/mode_0": 0.3333333333333333, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.125, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.8333333333333334, + "TT_Document/mode_1": 0.16666666666666666, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.4, + "TT_Math/mode_0": 0.45588235294117646, + "TT_Math/mode_1": 0.4117647058823529, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.375, + "completion_length": 350.97265625, + "completion_length/mode_0": 366.796875, + "completion_length/mode_1": 335.1484375, + "epoch": 0.018198362147406732, + "format_confidence": 0.5, + "grad_norm": 2.2680753597810743, + "grounded_proportion": 0.5, + "kl": 0.01123046875, + "learning_rate": 9.827115559599636e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.00390625, + "reward": 1.3515625, + "reward_std": 0.35683149099349976, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.35546875, + "step": 20 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.35, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.55, + "TT_Math/mode_1": 0.4666666666666667, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5, + "completion_length": 300.5703125, + "completion_length/mode_0": 328.75, + "completion_length/mode_1": 272.390625, + "epoch": 0.01910828025477707, + "format_confidence": 0.5, + "grad_norm": 1.1844820805839822, + "grounded_proportion": 0.5, + "kl": 0.0118408203125, + "learning_rate": 9.818016378525933e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.3515625, + "reward_std": 0.27275240421295166, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3515625, + "step": 21 + }, + { + "TT_Chart/mode_0": 0.20833333333333334, + "TT_Chart/mode_1": 0.20833333333333334, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.45, + "TT_Math/mode_1": 0.45, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.4166666666666667, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 331.59765625, + "completion_length/mode_0": 350.2890625, + "completion_length/mode_1": 312.90625, + "epoch": 0.020018198362147407, + "format_confidence": 0.5, + "grad_norm": 0.8541575651698342, + "grounded_proportion": 0.5, + "kl": 0.010986328125, + "learning_rate": 9.80891719745223e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.30078125, + "reward_std": 0.2321278154850006, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.30078125, + "step": 22 + }, + { + "TT_Chart/mode_0": 0.5833333333333334, + "TT_Chart/mode_1": 0.4166666666666667, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.48863636363636365, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.6666666666666666, + "TT_Science/mode_1": 0.5, + "completion_length": 387.9765625, + "completion_length/mode_0": 414.9453125, + "completion_length/mode_1": 361.0078125, + "epoch": 0.020928116469517744, + "format_confidence": 0.5, + "grad_norm": 1.5808055185099863, + "grounded_proportion": 0.5, + "kl": 0.006439208984375, + "learning_rate": 9.799818016378525e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0, + "reward": 1.46484375, + "reward_std": 0.32852408289909363, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.46484375, + "step": 23 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.125, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.453125, + "TT_Math/mode_1": 0.515625, + "TT_OCR/mode_0": 0.5625, + "TT_OCR/mode_1": 0.6875, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 331.4140625, + "completion_length/mode_0": 352.4453125, + "completion_length/mode_1": 310.3828125, + "epoch": 0.02183803457688808, + "format_confidence": 0.5, + "grad_norm": 0.8593245043140431, + "grounded_proportion": 0.5, + "kl": 0.01055908203125, + "learning_rate": 9.790718835304822e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.3671875, + "reward_std": 0.24446570873260498, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3671875, + "step": 24 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.3125, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.484375, + "TT_OCR/mode_0": 0.5625, + "TT_OCR/mode_1": 0.6875, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.1875, + "completion_length": 346.8046875, + "completion_length/mode_0": 340.640625, + "completion_length/mode_1": 352.96875, + "epoch": 0.022747952684258416, + "format_confidence": 0.5, + "grad_norm": 0.9872564452072786, + "grounded_proportion": 0.5, + "kl": 0.00830078125, + "learning_rate": 9.78161965423112e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0078125, + "reward": 1.3359375, + "reward_std": 0.3047879636287689, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.34375, + "step": 25 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.6666666666666666, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4722222222222222, + "TT_Math/mode_1": 0.3611111111111111, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.3125, + "completion_length": 370.4453125, + "completion_length/mode_0": 427.0, + "completion_length/mode_1": 313.890625, + "epoch": 0.023657870791628753, + "format_confidence": 0.5, + "grad_norm": 1.0075122997992207, + "grounded_proportion": 0.5, + "kl": 0.0067138671875, + "learning_rate": 9.772520473157414e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0, + "reward": 1.34375, + "reward_std": 0.301014244556427, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.34765625, + "step": 26 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.125, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.453125, + "TT_Math/mode_1": 0.46875, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.05, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 331.84375, + "completion_length/mode_0": 353.1015625, + "completion_length/mode_1": 310.5859375, + "epoch": 0.02456778889899909, + "format_confidence": 0.5, + "grad_norm": 0.9025821168695539, + "grounded_proportion": 0.5, + "kl": 0.0146484375, + "learning_rate": 9.763421292083713e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.28125, + "reward_std": 0.2187202274799347, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.28125, + "step": 27 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.3125, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.6666666666666666, + "TT_Math/mode_0": 0.3333333333333333, + "TT_Math/mode_1": 0.2777777777777778, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 390.828125, + "completion_length/mode_0": 404.609375, + "completion_length/mode_1": 377.046875, + "epoch": 0.025477707006369428, + "format_confidence": 0.5, + "grad_norm": 1.1882479091934361, + "grounded_proportion": 0.5, + "kl": 0.021728515625, + "learning_rate": 9.75432211101001e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.29296875, + "reward_std": 0.25501734018325806, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.29296875, + "step": 28 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.16666666666666666, + "TT_Counting/mode_1": 0.08333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.125, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5138888888888888, + "TT_Math/mode_1": 0.4583333333333333, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 455.8359375, + "completion_length/mode_0": 462.328125, + "completion_length/mode_1": 449.34375, + "epoch": 0.026387625113739762, + "format_confidence": 0.5, + "grad_norm": 0.6446278395164787, + "grounded_proportion": 0.5, + "kl": 0.005828857421875, + "learning_rate": 9.745222929936306e-07, + "loss": 0.0002, + "over_lengthy_sequences": 0.0078125, + "reward": 1.3125, + "reward_std": 0.22071683406829834, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.3203125, + "step": 29 + }, + { + "TT_Chart/mode_0": 0.4166666666666667, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.125, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.4090909090909091, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.125, + "completion_length": 432.3046875, + "completion_length/mode_0": 476.96875, + "completion_length/mode_1": 387.640625, + "epoch": 0.0272975432211101, + "format_confidence": 0.5, + "grad_norm": 0.7387045634169115, + "grounded_proportion": 0.5, + "kl": 0.01116943359375, + "learning_rate": 9.736123748862602e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.3345615267753601, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.38671875, + "step": 30 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.125, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5125, + "TT_Math/mode_1": 0.525, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.75, + "completion_length": 387.2578125, + "completion_length/mode_0": 424.34375, + "completion_length/mode_1": 350.171875, + "epoch": 0.028207461328480437, + "format_confidence": 0.5, + "grad_norm": 0.9075215710759794, + "grounded_proportion": 0.5, + "kl": 0.0091552734375, + "learning_rate": 9.727024567788898e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.40234375, + "reward_std": 0.2678895890712738, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40234375, + "step": 31 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.16666666666666666, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5681818181818182, + "TT_Math/mode_1": 0.5227272727272727, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.0, + "completion_length": 367.25, + "completion_length/mode_0": 399.828125, + "completion_length/mode_1": 334.671875, + "epoch": 0.029117379435850774, + "format_confidence": 0.5, + "grad_norm": 0.7678387402875528, + "grounded_proportion": 0.5, + "kl": 0.01202392578125, + "learning_rate": 9.717925386715195e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.42578125, + "reward_std": 0.284709632396698, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.42578125, + "step": 32 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.42391304347826086, + "TT_Math/mode_1": 0.5434782608695652, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.0, + "completion_length": 412.55859375, + "completion_length/mode_0": 444.4921875, + "completion_length/mode_1": 380.625, + "epoch": 0.03002729754322111, + "format_confidence": 0.5, + "grad_norm": 1.076115993141789, + "grounded_proportion": 0.5, + "kl": 0.01397705078125, + "learning_rate": 9.70882620564149e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.304455041885376, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 33 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.3375, + "TT_Math/mode_1": 0.4, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 358.66015625, + "completion_length/mode_0": 385.1640625, + "completion_length/mode_1": 332.15625, + "epoch": 0.030937215650591446, + "format_confidence": 0.5, + "grad_norm": 1.2114231515737024, + "grounded_proportion": 0.5, + "kl": 0.0238037109375, + "learning_rate": 9.69972702456779e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.3359375, + "reward_std": 0.21515312790870667, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3359375, + "step": 34 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.08333333333333333, + "TT_Math/mode_0": 0.5694444444444444, + "TT_Math/mode_1": 0.4861111111111111, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 339.53125, + "completion_length/mode_0": 356.71875, + "completion_length/mode_1": 322.34375, + "epoch": 0.03184713375796178, + "format_confidence": 0.5, + "grad_norm": 0.9974240246736068, + "grounded_proportion": 0.5, + "kl": 0.026123046875, + "learning_rate": 9.690627843494086e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.00390625, + "reward": 1.3359375, + "reward_std": 0.28288590908050537, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.33984375, + "step": 35 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.05, + "TT_Detection/mode_1": 0.55, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.6333333333333333, + "TT_Math/mode_1": 0.4166666666666667, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.25, + "completion_length": 334.58203125, + "completion_length/mode_0": 380.921875, + "completion_length/mode_1": 288.2421875, + "epoch": 0.03275705186533212, + "format_confidence": 0.5, + "grad_norm": 1.6406753400760954, + "grounded_proportion": 0.5, + "kl": 0.03759765625, + "learning_rate": 9.681528662420382e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.31667181849479675, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.38671875, + "step": 36 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5543478260869565, + "TT_Math/mode_1": 0.5652173913043478, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.08333333333333333, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 348.41015625, + "completion_length/mode_0": 370.5234375, + "completion_length/mode_1": 326.296875, + "epoch": 0.03366696997270246, + "format_confidence": 0.5, + "grad_norm": 0.6026535973402165, + "grounded_proportion": 0.5, + "kl": 0.0167236328125, + "learning_rate": 9.672429481346678e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.4453125, + "reward_std": 0.24579495191574097, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4453125, + "step": 37 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.4875, + "TT_Math/mode_1": 0.6, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.0, + "completion_length": 325.0859375, + "completion_length/mode_0": 345.7109375, + "completion_length/mode_1": 304.4609375, + "epoch": 0.034576888080072796, + "format_confidence": 0.5, + "grad_norm": 1.7996536274917565, + "grounded_proportion": 0.5, + "kl": 0.039794921875, + "learning_rate": 9.663330300272975e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.2690715491771698, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 38 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.05, + "TT_Grounding/mode_1": 0.2, + "TT_Math/mode_0": 0.5277777777777778, + "TT_Math/mode_1": 0.4305555555555556, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.0, + "completion_length": 337.09375, + "completion_length/mode_0": 384.3984375, + "completion_length/mode_1": 289.7890625, + "epoch": 0.03548680618744313, + "format_confidence": 0.5, + "grad_norm": 1.8348507814517516, + "grounded_proportion": 0.5, + "kl": 0.051513671875, + "learning_rate": 9.65423111919927e-07, + "loss": 0.0021, + "over_lengthy_sequences": 0.00390625, + "reward": 1.375, + "reward_std": 0.29592859745025635, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.37890625, + "step": 39 + }, + { + "TT_Chart/mode_0": 0.4166666666666667, + "TT_Chart/mode_1": 0.3333333333333333, + "TT_Counting/mode_0": 0.4, + "TT_Counting/mode_1": 0.2, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.05, + "TT_Grounding/mode_1": 0.2, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.375, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.0, + "completion_length": 423.27734375, + "completion_length/mode_0": 478.3359375, + "completion_length/mode_1": 368.21875, + "epoch": 0.036396724294813464, + "format_confidence": 0.5, + "grad_norm": 0.8417782233647189, + "grounded_proportion": 0.5, + "kl": 0.0096435546875, + "learning_rate": 9.645131938125567e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.4140625, + "reward_std": 0.3811083436012268, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4140625, + "step": 40 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.5833333333333334, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.6333333333333333, + "TT_Math/mode_1": 0.6, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.625, + "TT_Others/mode_0": 0.5, + "TT_Others/mode_1": 0.375, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5, + "completion_length": 346.296875, + "completion_length/mode_0": 368.3125, + "completion_length/mode_1": 324.28125, + "epoch": 0.0373066424021838, + "format_confidence": 0.5, + "grad_norm": 0.8971248354232603, + "grounded_proportion": 0.5, + "kl": 0.01708984375, + "learning_rate": 9.636032757051866e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.45703125, + "reward_std": 0.2794685363769531, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.45703125, + "step": 41 + }, + { + "TT_Chart/mode_0": 0.39285714285714285, + "TT_Chart/mode_1": 0.32142857142857145, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5192307692307693, + "TT_Math/mode_1": 0.5192307692307693, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.16666666666666666, + "TT_Science/mode_1": 0.4166666666666667, + "completion_length": 327.87890625, + "completion_length/mode_0": 374.3984375, + "completion_length/mode_1": 281.359375, + "epoch": 0.03821656050955414, + "format_confidence": 0.5, + "grad_norm": 1.969070754091683, + "grounded_proportion": 0.5, + "kl": 0.0157470703125, + "learning_rate": 9.626933575978162e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.375, + "reward_std": 0.2452620565891266, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.375, + "step": 42 + }, + { + "TT_Chart/mode_0": 0.2916666666666667, + "TT_Chart/mode_1": 0.2916666666666667, + "TT_Counting/mode_0": 0.6666666666666666, + "TT_Counting/mode_1": 0.6666666666666666, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5833333333333334, + "TT_Math/mode_1": 0.5208333333333334, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.75, + "completion_length": 279.71875, + "completion_length/mode_0": 307.2421875, + "completion_length/mode_1": 252.1953125, + "epoch": 0.039126478616924476, + "format_confidence": 0.5, + "grad_norm": 3.84523994130464, + "grounded_proportion": 0.5, + "kl": 0.0184326171875, + "learning_rate": 9.617834394904458e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.390625, + "reward_std": 0.2264637053012848, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.390625, + "step": 43 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.125, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.5588235294117647, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.375, + "completion_length": 324.25390625, + "completion_length/mode_0": 361.8515625, + "completion_length/mode_1": 286.65625, + "epoch": 0.040036396724294813, + "format_confidence": 0.5, + "grad_norm": 0.9447344472383061, + "grounded_proportion": 0.5, + "kl": 0.01422119140625, + "learning_rate": 9.608735213830755e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.39453125, + "reward_std": 0.29340648651123047, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.39453125, + "step": 44 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.875, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4473684210526316, + "TT_Math/mode_1": 0.39473684210526316, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.5, + "TT_Others/mode_1": 1.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.25, + "completion_length": 337.859375, + "completion_length/mode_0": 363.390625, + "completion_length/mode_1": 312.328125, + "epoch": 0.04094631483166515, + "format_confidence": 0.5, + "grad_norm": 1.4320135714153, + "grounded_proportion": 0.5, + "kl": 0.013671875, + "learning_rate": 9.599636032757051e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.375, + "reward_std": 0.31154942512512207, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.37890625, + "step": 45 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.5625, + "TT_Counting/mode_1": 0.4375, + "TT_Detection/mode_0": 1.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.3815789473684211, + "TT_Math/mode_1": 0.2894736842105263, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 380.5, + "completion_length/mode_0": 418.28125, + "completion_length/mode_1": 342.71875, + "epoch": 0.04185623293903549, + "format_confidence": 0.5, + "grad_norm": 0.7559008917052512, + "grounded_proportion": 0.5, + "kl": 0.012939453125, + "learning_rate": 9.590536851683348e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.37890625, + "reward_std": 0.23144766688346863, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.37890625, + "step": 46 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.4375, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5588235294117647, + "TT_Math/mode_1": 0.45588235294117646, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.2, + "TT_Science/mode_1": 0.2, + "completion_length": 399.77734375, + "completion_length/mode_0": 433.140625, + "completion_length/mode_1": 366.4140625, + "epoch": 0.042766151046405826, + "format_confidence": 0.5, + "grad_norm": 1.0624054559364031, + "grounded_proportion": 0.5, + "kl": 0.01416015625, + "learning_rate": 9.581437670609644e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.4140625, + "reward_std": 0.35639268159866333, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4140625, + "step": 47 + }, + { + "TT_Chart/mode_0": 0.875, + "TT_Chart/mode_1": 0.875, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.46875, + "TT_OCR/mode_0": 0.3333333333333333, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.4166666666666667, + "completion_length": 349.78125, + "completion_length/mode_0": 375.375, + "completion_length/mode_1": 324.1875, + "epoch": 0.04367606915377616, + "format_confidence": 0.5, + "grad_norm": 0.6132246625343835, + "grounded_proportion": 0.5, + "kl": 0.011474609375, + "learning_rate": 9.572338489535942e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.2500086724758148, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.38671875, + "step": 48 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.875, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.39473684210526316, + "TT_Math/mode_1": 0.4868421052631579, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.4166666666666667, + "completion_length": 357.80078125, + "completion_length/mode_0": 382.140625, + "completion_length/mode_1": 333.4609375, + "epoch": 0.044585987261146494, + "format_confidence": 0.5, + "grad_norm": 1.3534089418671387, + "grounded_proportion": 0.5, + "kl": 0.0108642578125, + "learning_rate": 9.563239308462239e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.3424571454524994, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 49 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.4722222222222222, + "TT_Math/mode_1": 0.4305555555555556, + "TT_OCR/mode_0": 0.08333333333333333, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.375, + "completion_length": 348.2421875, + "completion_length/mode_0": 378.8203125, + "completion_length/mode_1": 317.6640625, + "epoch": 0.04549590536851683, + "format_confidence": 0.5, + "grad_norm": 1.040116212408755, + "grounded_proportion": 0.5, + "kl": 0.0142822265625, + "learning_rate": 9.554140127388535e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.34375, + "reward_std": 0.3203405737876892, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.34765625, + "step": 50 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.16666666666666666, + "TT_Counting/mode_1": 0.5833333333333334, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.40789473684210525, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3333333333333333, + "TT_Science/mode_1": 0.75, + "completion_length": 356.33203125, + "completion_length/mode_0": 390.25, + "completion_length/mode_1": 322.4140625, + "epoch": 0.04640582347588717, + "format_confidence": 0.5, + "grad_norm": 2.6529744307875993, + "grounded_proportion": 0.5, + "kl": 0.01220703125, + "learning_rate": 9.545040946314831e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.40234375, + "reward_std": 0.3068116307258606, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40234375, + "step": 51 + }, + { + "TT_Chart/mode_0": 0.3125, + "TT_Chart/mode_1": 0.4375, + "TT_Counting/mode_0": 0.5833333333333334, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.125, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.671875, + "TT_Math/mode_1": 0.59375, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.75, + "completion_length": 329.06640625, + "completion_length/mode_0": 356.7421875, + "completion_length/mode_1": 301.390625, + "epoch": 0.047315741583257506, + "format_confidence": 0.5, + "grad_norm": 0.9611109881006651, + "grounded_proportion": 0.5, + "kl": 0.013916015625, + "learning_rate": 9.535941765241128e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.48828125, + "reward_std": 0.38452082872390747, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.48828125, + "step": 52 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.1875, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.578125, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.125, + "completion_length": 327.03125, + "completion_length/mode_0": 346.4921875, + "completion_length/mode_1": 307.5703125, + "epoch": 0.048225659690627844, + "format_confidence": 0.5, + "grad_norm": 0.797769852763845, + "grounded_proportion": 0.5, + "kl": 0.012939453125, + "learning_rate": 9.526842584167425e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.34375, + "reward_std": 0.21713145077228546, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.34375, + "step": 53 + }, + { + "TT_Chart/mode_0": 0.6, + "TT_Chart/mode_1": 0.45, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.578125, + "TT_Math/mode_1": 0.578125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.5, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.4166666666666667, + "TT_Science/mode_1": 0.5833333333333334, + "completion_length": 292.44140625, + "completion_length/mode_0": 315.625, + "completion_length/mode_1": 269.2578125, + "epoch": 0.04913557779799818, + "format_confidence": 0.5, + "grad_norm": 1.549225397699757, + "grounded_proportion": 0.5, + "kl": 0.01287841796875, + "learning_rate": 9.517743403093721e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.46875, + "reward_std": 0.20779038965702057, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.46875, + "step": 54 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.6052631578947368, + "TT_Math/mode_1": 0.631578947368421, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.25, + "completion_length": 336.27734375, + "completion_length/mode_0": 361.40625, + "completion_length/mode_1": 311.1484375, + "epoch": 0.05004549590536852, + "format_confidence": 0.5, + "grad_norm": 1.0145037617177997, + "grounded_proportion": 0.5, + "kl": 0.01055908203125, + "learning_rate": 9.508644222020018e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.44921875, + "reward_std": 0.27354732155799866, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.44921875, + "step": 55 + }, + { + "TT_Chart/mode_0": 0.3888888888888889, + "TT_Chart/mode_1": 0.3888888888888889, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.16666666666666666, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.7115384615384616, + "TT_Math/mode_1": 0.5961538461538461, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.25, + "completion_length": 319.97265625, + "completion_length/mode_0": 339.140625, + "completion_length/mode_1": 300.8046875, + "epoch": 0.050955414012738856, + "format_confidence": 0.5, + "grad_norm": 1.126724757554702, + "grounded_proportion": 0.5, + "kl": 0.01165771484375, + "learning_rate": 9.499545040946314e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.00390625, + "reward": 1.44140625, + "reward_std": 0.27130943536758423, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.4453125, + "step": 56 + }, + { + "TT_Chart/mode_0": 0.3333333333333333, + "TT_Chart/mode_1": 0.3333333333333333, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.875, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4264705882352941, + "TT_Math/mode_1": 0.4852941176470588, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.25, + "completion_length": 377.96875, + "completion_length/mode_0": 419.2265625, + "completion_length/mode_1": 336.7109375, + "epoch": 0.051865332120109194, + "format_confidence": 0.5, + "grad_norm": 0.7761878682566107, + "grounded_proportion": 0.5, + "kl": 0.012939453125, + "learning_rate": 9.490445859872611e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.33984375, + "reward_std": 0.19791889190673828, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.33984375, + "step": 57 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.16666666666666666, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5131578947368421, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 354.40625, + "completion_length/mode_0": 378.8828125, + "completion_length/mode_1": 329.9296875, + "epoch": 0.052775250227479524, + "format_confidence": 0.5, + "grad_norm": 1.0626500338136928, + "grounded_proportion": 0.5, + "kl": 0.03759765625, + "learning_rate": 9.481346678798907e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.37109375, + "reward_std": 0.25118574500083923, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.37109375, + "step": 58 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.46, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.25, + "completion_length": 438.22265625, + "completion_length/mode_0": 464.8359375, + "completion_length/mode_1": 411.609375, + "epoch": 0.05368516833484986, + "format_confidence": 0.5, + "grad_norm": 1.2595171994008831, + "grounded_proportion": 0.5, + "kl": 0.00750732421875, + "learning_rate": 9.472247497725204e-07, + "loss": 0.0003, + "over_lengthy_sequences": 0.0, + "reward": 1.4453125, + "reward_std": 0.26565414667129517, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4453125, + "step": 59 + }, + { + "TT_Chart/mode_0": 0.21428571428571427, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.40625, + "TT_Math/mode_1": 0.265625, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 421.61328125, + "completion_length/mode_0": 453.5625, + "completion_length/mode_1": 389.6640625, + "epoch": 0.0545950864422202, + "format_confidence": 0.5, + "grad_norm": 0.7843446949263764, + "grounded_proportion": 0.5, + "kl": 0.0103759765625, + "learning_rate": 9.463148316651502e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.2734375, + "reward_std": 0.24078628420829773, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.2734375, + "step": 60 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.125, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5568181818181818, + "TT_Math/mode_1": 0.5568181818181818, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 374.40625, + "completion_length/mode_0": 416.109375, + "completion_length/mode_1": 332.703125, + "epoch": 0.055505004549590536, + "format_confidence": 0.5, + "grad_norm": 0.49588424035326556, + "grounded_proportion": 0.5, + "kl": 0.00921630859375, + "learning_rate": 9.454049135577798e-07, + "loss": 0.0004, + "over_lengthy_sequences": 0.0, + "reward": 1.4140625, + "reward_std": 0.20528410375118256, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.41796875, + "step": 61 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.3125, + "TT_Math/mode_0": 0.4166666666666667, + "TT_Math/mode_1": 0.5694444444444444, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 370.55078125, + "completion_length/mode_0": 399.0546875, + "completion_length/mode_1": 342.046875, + "epoch": 0.056414922656960874, + "format_confidence": 0.5, + "grad_norm": 0.9866700336073141, + "grounded_proportion": 0.5, + "kl": 0.01806640625, + "learning_rate": 9.444949954504094e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.44921875, + "reward_std": 0.26063913106918335, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.44921875, + "step": 62 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.53125, + "TT_Counting/mode_1": 0.4375, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.6166666666666667, + "TT_Math/mode_1": 0.7, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 312.875, + "completion_length/mode_0": 329.6640625, + "completion_length/mode_1": 296.0859375, + "epoch": 0.05732484076433121, + "format_confidence": 0.5, + "grad_norm": 0.826234605264805, + "grounded_proportion": 0.5, + "kl": 0.0211181640625, + "learning_rate": 9.435850773430391e-07, + "loss": 0.0008, + "over_lengthy_sequences": 0.0, + "reward": 1.48828125, + "reward_std": 0.2982718050479889, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.4921875, + "step": 63 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.08333333333333333, + "TT_Counting/mode_0": 0.4642857142857143, + "TT_Counting/mode_1": 0.42857142857142855, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.5588235294117647, + "TT_Math/mode_1": 0.4852941176470588, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 320.4296875, + "completion_length/mode_0": 344.984375, + "completion_length/mode_1": 295.875, + "epoch": 0.05823475887170155, + "format_confidence": 0.5, + "grad_norm": 1.332396950969122, + "grounded_proportion": 0.5, + "kl": 0.017822265625, + "learning_rate": 9.426751592356688e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.4375, + "reward_std": 0.30236050486564636, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4375, + "step": 64 + }, + { + "TT_Chart/mode_0": 0.5833333333333334, + "TT_Chart/mode_1": 0.4166666666666667, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.7708333333333334, + "TT_Math/mode_1": 0.7708333333333334, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.35, + "TT_Science/mode_1": 0.4, + "completion_length": 265.51171875, + "completion_length/mode_0": 294.296875, + "completion_length/mode_1": 236.7265625, + "epoch": 0.059144676979071886, + "format_confidence": 0.5, + "grad_norm": 1.0706428268753283, + "grounded_proportion": 0.5, + "kl": 0.0172119140625, + "learning_rate": 9.417652411282983e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.53515625, + "reward_std": 0.2715778052806854, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.53515625, + "step": 65 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.35714285714285715, + "TT_Math/mode_0": 0.625, + "TT_Math/mode_1": 0.703125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.1875, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.35, + "TT_Science/mode_1": 0.4, + "completion_length": 337.359375, + "completion_length/mode_0": 346.75, + "completion_length/mode_1": 327.96875, + "epoch": 0.06005459508644222, + "format_confidence": 0.5, + "grad_norm": 0.9882812019724813, + "grounded_proportion": 0.5, + "kl": 0.0286865234375, + "learning_rate": 9.408553230209281e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.4296875, + "reward_std": 0.2574900984764099, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4296875, + "step": 66 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.16666666666666666, + "TT_Counting/mode_1": 0.16666666666666666, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.5227272727272727, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.5, + "completion_length": 360.41796875, + "completion_length/mode_0": 384.0, + "completion_length/mode_1": 336.8359375, + "epoch": 0.060964513193812554, + "format_confidence": 0.5, + "grad_norm": 1.081856623327895, + "grounded_proportion": 0.5, + "kl": 0.013671875, + "learning_rate": 9.399454049135578e-07, + "loss": 0.0005, + "over_lengthy_sequences": 0.0, + "reward": 1.3984375, + "reward_std": 0.3071898818016052, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3984375, + "step": 67 + }, + { + "TT_Chart/mode_0": 0.3333333333333333, + "TT_Chart/mode_1": 0.3333333333333333, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.36904761904761907, + "TT_Math/mode_1": 0.4523809523809524, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.25, + "completion_length": 382.390625, + "completion_length/mode_0": 399.09375, + "completion_length/mode_1": 365.6875, + "epoch": 0.06187443130118289, + "format_confidence": 0.5, + "grad_norm": 1.0129574632580445, + "grounded_proportion": 0.5, + "kl": 0.0257568359375, + "learning_rate": 9.390354868061873e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.00390625, + "reward": 1.36328125, + "reward_std": 0.33184993267059326, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.3671875, + "step": 68 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.5625, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.35714285714285715, + "TT_Math/mode_1": 0.42857142857142855, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5, + "completion_length": 315.05078125, + "completion_length/mode_0": 341.9140625, + "completion_length/mode_1": 288.1875, + "epoch": 0.06278434940855324, + "format_confidence": 0.5, + "grad_norm": 0.6116970446341301, + "grounded_proportion": 0.5, + "kl": 0.031005859375, + "learning_rate": 9.381255686988171e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.31640625, + "reward_std": 0.25513356924057007, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.31640625, + "step": 69 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.75, + "TT_Counting/mode_0": 0.5833333333333334, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.6111111111111112, + "TT_Math/mode_1": 0.6388888888888888, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 1.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.3, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 1.0, + "completion_length": 325.84375, + "completion_length/mode_0": 348.8671875, + "completion_length/mode_1": 302.8203125, + "epoch": 0.06369426751592357, + "format_confidence": 0.5, + "grad_norm": 1.393496150546713, + "grounded_proportion": 0.5, + "kl": 0.0267333984375, + "learning_rate": 9.372156505914467e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.49609375, + "reward_std": 0.3036562502384186, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.49609375, + "step": 70 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5277777777777778, + "TT_Math/mode_1": 0.5694444444444444, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.375, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 358.953125, + "completion_length/mode_0": 377.46875, + "completion_length/mode_1": 340.4375, + "epoch": 0.0646041856232939, + "format_confidence": 0.5, + "grad_norm": 0.8157106114860397, + "grounded_proportion": 0.5, + "kl": 0.01556396484375, + "learning_rate": 9.363057324840764e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.40625, + "reward_std": 0.2968239188194275, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40625, + "step": 71 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.3888888888888889, + "TT_Math/mode_1": 0.37962962962962965, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 524.28515625, + "completion_length/mode_0": 538.4765625, + "completion_length/mode_1": 510.09375, + "epoch": 0.06551410373066424, + "format_confidence": 0.5, + "grad_norm": 1.4368535247954564, + "grounded_proportion": 0.5, + "kl": 0.038330078125, + "learning_rate": 9.35395814376706e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0078125, + "reward": 1.328125, + "reward_std": 0.3050992488861084, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.3359375, + "step": 72 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.45, + "TT_Counting/mode_1": 0.6, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.6052631578947368, + "TT_Math/mode_1": 0.618421052631579, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 345.50390625, + "completion_length/mode_0": 374.7734375, + "completion_length/mode_1": 316.234375, + "epoch": 0.06642402183803457, + "format_confidence": 0.5, + "grad_norm": 1.4581833815927654, + "grounded_proportion": 0.5, + "kl": 0.027587890625, + "learning_rate": 9.344858962693357e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.00390625, + "reward": 1.47265625, + "reward_std": 0.3206025958061218, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.4765625, + "step": 73 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.125, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6666666666666666, + "TT_Math/mode_1": 0.5833333333333334, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5833333333333334, + "TT_Science/mode_1": 0.4166666666666667, + "completion_length": 336.33203125, + "completion_length/mode_0": 374.1171875, + "completion_length/mode_1": 298.546875, + "epoch": 0.06733393994540492, + "format_confidence": 0.5, + "grad_norm": 1.9303402353521169, + "grounded_proportion": 0.5, + "kl": 0.0269775390625, + "learning_rate": 9.335759781619655e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.51171875, + "reward_std": 0.40742409229278564, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.51171875, + "step": 74 + }, + { + "TT_Chart/mode_0": 0.3, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0625, + "TT_Grounding/mode_1": 0.5625, + "TT_Math/mode_0": 0.44642857142857145, + "TT_Math/mode_1": 0.5178571428571429, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 310.859375, + "completion_length/mode_0": 345.890625, + "completion_length/mode_1": 275.828125, + "epoch": 0.06824385805277525, + "format_confidence": 0.5, + "grad_norm": 1.094986906392167, + "grounded_proportion": 0.5, + "kl": 0.0250244140625, + "learning_rate": 9.32666060054595e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.32421875, + "reward_std": 0.2394905686378479, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.32421875, + "step": 75 + }, + { + "TT_Chart/mode_0": 0.35, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.5125, + "TT_Math/mode_1": 0.45, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 1.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 444.99609375, + "completion_length/mode_0": 487.234375, + "completion_length/mode_1": 402.7578125, + "epoch": 0.06915377616014559, + "format_confidence": 0.5, + "grad_norm": 0.9456640910256001, + "grounded_proportion": 0.5, + "kl": 0.02783203125, + "learning_rate": 9.317561419472247e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.32864031195640564, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 76 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.484375, + "TT_Math/mode_1": 0.46875, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.375, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.375, + "completion_length": 357.015625, + "completion_length/mode_0": 384.8125, + "completion_length/mode_1": 329.21875, + "epoch": 0.07006369426751592, + "format_confidence": 0.5, + "grad_norm": 1.126552968002394, + "grounded_proportion": 0.5, + "kl": 0.01446533203125, + "learning_rate": 9.308462238398544e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.3359375, + "reward_std": 0.20832324028015137, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3359375, + "step": 77 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.08333333333333333, + "TT_Detection/mode_0": 0.3125, + "TT_Detection/mode_1": 0.3125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.39473684210526316, + "TT_Math/mode_1": 0.39473684210526316, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.375, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.75, + "completion_length": 365.06640625, + "completion_length/mode_0": 388.1796875, + "completion_length/mode_1": 341.953125, + "epoch": 0.07097361237488627, + "format_confidence": 0.5, + "grad_norm": 1.8929118579408397, + "grounded_proportion": 0.5, + "kl": 0.021240234375, + "learning_rate": 9.299363057324841e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.00390625, + "reward": 1.36328125, + "reward_std": 0.32180553674697876, + "rewards/format_reward": 0.98828125, + "rewards/general_task_reward": 0.375, + "step": 78 + }, + { + "TT_Chart/mode_0": 0.4166666666666667, + "TT_Chart/mode_1": 0.4166666666666667, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.5441176470588235, + "TT_Math/mode_1": 0.4264705882352941, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.25, + "completion_length": 412.8359375, + "completion_length/mode_0": 454.6640625, + "completion_length/mode_1": 371.0078125, + "epoch": 0.0718835304822566, + "format_confidence": 0.5, + "grad_norm": 2.3611466751955206, + "grounded_proportion": 0.5, + "kl": 0.0196533203125, + "learning_rate": 9.290263876251136e-07, + "loss": 0.0008, + "over_lengthy_sequences": 0.00390625, + "reward": 1.35546875, + "reward_std": 0.2786721885204315, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.359375, + "step": 79 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.8333333333333334, + "TT_Math/mode_0": 0.703125, + "TT_Math/mode_1": 0.546875, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.3333333333333333, + "TT_Others/mode_1": 0.4166666666666667, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 324.98828125, + "completion_length/mode_0": 360.6484375, + "completion_length/mode_1": 289.328125, + "epoch": 0.07279344858962693, + "format_confidence": 0.5, + "grad_norm": 1.423595772914213, + "grounded_proportion": 0.5, + "kl": 0.028076171875, + "learning_rate": 9.281164695177434e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.4765625, + "reward_std": 0.32864275574684143, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4765625, + "step": 80 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.3333333333333333, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.4583333333333333, + "TT_Math/mode_1": 0.4861111111111111, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 421.86328125, + "completion_length/mode_0": 444.5234375, + "completion_length/mode_1": 399.203125, + "epoch": 0.07370336669699727, + "format_confidence": 0.5, + "grad_norm": 0.9052028066723226, + "grounded_proportion": 0.5, + "kl": 0.0186767578125, + "learning_rate": 9.272065514103731e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.37890625, + "reward_std": 0.3306756913661957, + "rewards/format_reward": 0.98828125, + "rewards/general_task_reward": 0.390625, + "step": 81 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0625, + "TT_Detection/mode_1": 0.6875, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.55, + "TT_Math/mode_1": 0.5125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.5, + "completion_length": 383.95703125, + "completion_length/mode_0": 410.4375, + "completion_length/mode_1": 357.4765625, + "epoch": 0.0746132848043676, + "format_confidence": 0.5, + "grad_norm": 1.1442493170852235, + "grounded_proportion": 0.5, + "kl": 0.030029296875, + "learning_rate": 9.262966333030026e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.46875, + "reward_std": 0.3109995126724243, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.47265625, + "step": 82 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.125, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.4166666666666667, + "TT_Math/mode_0": 0.525, + "TT_Math/mode_1": 0.475, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.625, + "completion_length": 355.09765625, + "completion_length/mode_0": 388.8984375, + "completion_length/mode_1": 321.296875, + "epoch": 0.07552320291173795, + "format_confidence": 0.5, + "grad_norm": 1.1396443794919924, + "grounded_proportion": 0.5, + "kl": 0.01708984375, + "learning_rate": 9.253867151956324e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.4296875, + "reward_std": 0.3350968360900879, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4296875, + "step": 83 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.08333333333333333, + "TT_Counting/mode_0": 0.3, + "TT_Counting/mode_1": 0.3, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.4375, + "TT_Document/mode_0": 0.3333333333333333, + "TT_Document/mode_1": 0.3333333333333333, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.4807692307692308, + "TT_Math/mode_1": 0.5769230769230769, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 306.87890625, + "completion_length/mode_0": 345.4296875, + "completion_length/mode_1": 268.328125, + "epoch": 0.07643312101910828, + "format_confidence": 0.5, + "grad_norm": 0.8312318484762555, + "grounded_proportion": 0.5, + "kl": 0.0380859375, + "learning_rate": 9.24476797088262e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.36328125, + "reward_std": 0.2795896828174591, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.3671875, + "step": 84 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.08333333333333333, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.4270833333333333, + "TT_Math/mode_1": 0.4479166666666667, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 418.26953125, + "completion_length/mode_0": 462.9609375, + "completion_length/mode_1": 373.578125, + "epoch": 0.07734303912647862, + "format_confidence": 0.5, + "grad_norm": 0.7031346175738115, + "grounded_proportion": 0.5, + "kl": 0.0191650390625, + "learning_rate": 9.235668789808917e-07, + "loss": 0.0008, + "over_lengthy_sequences": 0.0, + "reward": 1.37890625, + "reward_std": 0.2682702839374542, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.37890625, + "step": 85 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.3125, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.6710526315789473, + "TT_Math/mode_1": 0.631578947368421, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.75, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.25, + "completion_length": 314.35546875, + "completion_length/mode_0": 347.1015625, + "completion_length/mode_1": 281.609375, + "epoch": 0.07825295723384895, + "format_confidence": 0.5, + "grad_norm": 1.1106438112336248, + "grounded_proportion": 0.5, + "kl": 0.033447265625, + "learning_rate": 9.226569608735213e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.53125, + "reward_std": 0.3708537220954895, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.53125, + "step": 86 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.4659090909090909, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 459.609375, + "completion_length/mode_0": 470.390625, + "completion_length/mode_1": 448.828125, + "epoch": 0.0791628753412193, + "format_confidence": 0.5, + "grad_norm": 0.9111319582454244, + "grounded_proportion": 0.5, + "kl": 0.0283203125, + "learning_rate": 9.21747042766151e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.00390625, + "reward": 1.37890625, + "reward_std": 0.32089686393737793, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.3828125, + "step": 87 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.3125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.16666666666666666, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.45588235294117646, + "TT_Math/mode_1": 0.35294117647058826, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3, + "TT_Science/mode_1": 0.5, + "completion_length": 288.15625, + "completion_length/mode_0": 321.078125, + "completion_length/mode_1": 255.234375, + "epoch": 0.08007279344858963, + "format_confidence": 0.5, + "grad_norm": 0.9337639335763862, + "grounded_proportion": 0.5, + "kl": 0.0252685546875, + "learning_rate": 9.208371246587808e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.33984375, + "reward_std": 0.32207000255584717, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.33984375, + "step": 88 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.16666666666666666, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.4444444444444444, + "TT_Math/mode_1": 0.4305555555555556, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.0, + "completion_length": 379.68359375, + "completion_length/mode_0": 409.75, + "completion_length/mode_1": 349.6171875, + "epoch": 0.08098271155595996, + "format_confidence": 0.5, + "grad_norm": 0.9170168681283755, + "grounded_proportion": 0.5, + "kl": 0.04052734375, + "learning_rate": 9.199272065514103e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.27734375, + "reward_std": 0.2290886491537094, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.27734375, + "step": 89 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.1875, + "TT_Grounding/mode_1": 0.3125, + "TT_Math/mode_0": 0.5625, + "TT_Math/mode_1": 0.525, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 1.0, + "completion_length": 332.9296875, + "completion_length/mode_0": 359.171875, + "completion_length/mode_1": 306.6875, + "epoch": 0.0818926296633303, + "format_confidence": 0.5, + "grad_norm": 1.2383174113880757, + "grounded_proportion": 0.5, + "kl": 0.0498046875, + "learning_rate": 9.1901728844404e-07, + "loss": 0.002, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.29393690824508667, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 90 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.3125, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.3125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.4625, + "TT_Math/mode_1": 0.4, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 1.0, + "completion_length": 342.94921875, + "completion_length/mode_0": 382.40625, + "completion_length/mode_1": 303.4921875, + "epoch": 0.08280254777070063, + "format_confidence": 0.5, + "grad_norm": 1.0193811299262545, + "grounded_proportion": 0.5, + "kl": 0.03173828125, + "learning_rate": 9.181073703366697e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.3356223702430725, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 91 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5657894736842105, + "TT_Math/mode_1": 0.4473684210526316, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.1875, + "completion_length": 314.7265625, + "completion_length/mode_0": 340.65625, + "completion_length/mode_1": 288.796875, + "epoch": 0.08371246587807098, + "format_confidence": 0.5, + "grad_norm": 0.9300317397982756, + "grounded_proportion": 0.5, + "kl": 0.02294921875, + "learning_rate": 9.171974522292994e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.3043053150177002, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 92 + }, + { + "TT_Chart/mode_0": 0.4375, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.16666666666666666, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.359375, + "TT_Math/mode_1": 0.3125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 1.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.1875, + "completion_length": 352.9375, + "completion_length/mode_0": 383.890625, + "completion_length/mode_1": 321.984375, + "epoch": 0.08462238398544131, + "format_confidence": 0.5, + "grad_norm": 1.3154100577214958, + "grounded_proportion": 0.5, + "kl": 0.0419921875, + "learning_rate": 9.162875341219289e-07, + "loss": 0.0017, + "over_lengthy_sequences": 0.0, + "reward": 1.328125, + "reward_std": 0.29681897163391113, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.328125, + "step": 93 + }, + { + "TT_Chart/mode_0": 0.4375, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.453125, + "TT_Math/mode_1": 0.328125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 338.6953125, + "completion_length/mode_0": 377.4609375, + "completion_length/mode_1": 299.9296875, + "epoch": 0.08553230209281165, + "format_confidence": 0.5, + "grad_norm": 0.8794839213028697, + "grounded_proportion": 0.5, + "kl": 0.0306396484375, + "learning_rate": 9.153776160145587e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.27734375, + "reward_std": 0.2864776849746704, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.28125, + "step": 94 + }, + { + "TT_Chart/mode_0": 0.4375, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.675, + "TT_Math/mode_1": 0.6125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.4166666666666667, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 403.01171875, + "completion_length/mode_0": 441.59375, + "completion_length/mode_1": 364.4296875, + "epoch": 0.08644222020018198, + "format_confidence": 0.5, + "grad_norm": 0.7977211310971815, + "grounded_proportion": 0.5, + "kl": 0.0137939453125, + "learning_rate": 9.144676979071884e-07, + "loss": 0.0006, + "over_lengthy_sequences": 0.0, + "reward": 1.52734375, + "reward_std": 0.30707117915153503, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.52734375, + "step": 95 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.875, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.35294117647058826, + "TT_Math/mode_1": 0.3088235294117647, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.4166666666666667, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 380.80078125, + "completion_length/mode_0": 407.078125, + "completion_length/mode_1": 354.5234375, + "epoch": 0.08735213830755233, + "format_confidence": 0.5, + "grad_norm": 0.893912421847757, + "grounded_proportion": 0.5, + "kl": 0.039306640625, + "learning_rate": 9.135577797998179e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.3125, + "reward_std": 0.23303402960300446, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3125, + "step": 96 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.6666666666666666, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.875, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6309523809523809, + "TT_Math/mode_1": 0.5476190476190477, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.5, + "completion_length": 349.80859375, + "completion_length/mode_0": 384.3671875, + "completion_length/mode_1": 315.25, + "epoch": 0.08826205641492266, + "format_confidence": 0.5, + "grad_norm": 1.488221637431153, + "grounded_proportion": 0.5, + "kl": 0.047607421875, + "learning_rate": 9.126478616924477e-07, + "loss": 0.0019, + "over_lengthy_sequences": 0.0, + "reward": 1.48046875, + "reward_std": 0.33312100172042847, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.48046875, + "step": 97 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.75, + "TT_Counting/mode_0": 0.125, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.875, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.475, + "TT_Math/mode_1": 0.5125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 386.19140625, + "completion_length/mode_0": 423.7421875, + "completion_length/mode_1": 348.640625, + "epoch": 0.08917197452229299, + "format_confidence": 0.5, + "grad_norm": 0.7423205892030071, + "grounded_proportion": 0.5, + "kl": 0.0419921875, + "learning_rate": 9.117379435850773e-07, + "loss": 0.0017, + "over_lengthy_sequences": 0.00390625, + "reward": 1.4375, + "reward_std": 0.2985200881958008, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.44140625, + "step": 98 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.4166666666666667, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.4375, + "TT_Detection/mode_0": 0.375, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.08333333333333333, + "TT_Grounding/mode_1": 0.08333333333333333, + "TT_Math/mode_0": 0.578125, + "TT_Math/mode_1": 0.546875, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 307.75, + "completion_length/mode_0": 326.390625, + "completion_length/mode_1": 289.109375, + "epoch": 0.09008189262966333, + "format_confidence": 0.5, + "grad_norm": 1.089867159322649, + "grounded_proportion": 0.5, + "kl": 0.05029296875, + "learning_rate": 9.108280254777069e-07, + "loss": 0.002, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.2457924783229828, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 99 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0625, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.08333333333333333, + "TT_Grounding/mode_1": 0.08333333333333333, + "TT_Math/mode_0": 0.3333333333333333, + "TT_Math/mode_1": 0.2857142857142857, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.25, + "completion_length": 390.5234375, + "completion_length/mode_0": 416.84375, + "completion_length/mode_1": 364.203125, + "epoch": 0.09099181073703366, + "format_confidence": 0.5, + "grad_norm": 0.7432661418713463, + "grounded_proportion": 0.5, + "kl": 0.033447265625, + "learning_rate": 9.099181073703366e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.296875, + "reward_std": 0.3486403822898865, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.3046875, + "step": 100 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5138888888888888, + "TT_Math/mode_1": 0.4722222222222222, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3333333333333333, + "TT_Science/mode_1": 0.25, + "completion_length": 417.96875, + "completion_length/mode_0": 456.1015625, + "completion_length/mode_1": 379.8359375, + "epoch": 0.09190172884440401, + "format_confidence": 0.5, + "grad_norm": 0.8403418247569083, + "grounded_proportion": 0.5, + "kl": 0.017822265625, + "learning_rate": 9.090081892629663e-07, + "loss": 0.0007, + "over_lengthy_sequences": 0.0, + "reward": 1.37890625, + "reward_std": 0.31049102544784546, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.37890625, + "step": 101 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.4375, + "TT_Counting/mode_1": 0.1875, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.4852941176470588, + "TT_Math/mode_1": 0.38235294117647056, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 1.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.1875, + "completion_length": 357.59765625, + "completion_length/mode_0": 374.7421875, + "completion_length/mode_1": 340.453125, + "epoch": 0.09281164695177434, + "format_confidence": 0.5, + "grad_norm": 1.0298049787611796, + "grounded_proportion": 0.5, + "kl": 0.04736328125, + "learning_rate": 9.08098271155596e-07, + "loss": 0.0019, + "over_lengthy_sequences": 0.0, + "reward": 1.3671875, + "reward_std": 0.30194875597953796, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3671875, + "step": 102 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.45, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.6333333333333333, + "TT_Math/mode_1": 0.55, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 1.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 289.25, + "completion_length/mode_0": 316.984375, + "completion_length/mode_1": 261.515625, + "epoch": 0.09372156505914468, + "format_confidence": 0.5, + "grad_norm": 1.322134373732465, + "grounded_proportion": 0.5, + "kl": 0.03857421875, + "learning_rate": 9.071883530482256e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.4375, + "reward_std": 0.3878220021724701, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4375, + "step": 103 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.42105263157894735, + "TT_Math/mode_1": 0.4605263157894737, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3333333333333333, + "TT_Science/mode_1": 0.0, + "completion_length": 394.15625, + "completion_length/mode_0": 421.609375, + "completion_length/mode_1": 366.703125, + "epoch": 0.09463148316651501, + "format_confidence": 0.5, + "grad_norm": 1.138479077746897, + "grounded_proportion": 0.5, + "kl": 0.02734375, + "learning_rate": 9.062784349408553e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.33984375, + "reward_std": 0.28182753920555115, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.33984375, + "step": 104 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.375, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.6470588235294118, + "TT_Math/mode_1": 0.6176470588235294, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3333333333333333, + "TT_Science/mode_1": 0.0, + "completion_length": 359.4453125, + "completion_length/mode_0": 369.296875, + "completion_length/mode_1": 349.59375, + "epoch": 0.09554140127388536, + "format_confidence": 0.5, + "grad_norm": 0.8292575747476529, + "grounded_proportion": 0.5, + "kl": 0.029052734375, + "learning_rate": 9.05368516833485e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.52734375, + "reward_std": 0.22620166838169098, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.53125, + "step": 105 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.75, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.08333333333333333, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.47368421052631576, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.4166666666666667, + "TT_Others/mode_1": 0.6666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3333333333333333, + "TT_Science/mode_1": 0.0, + "completion_length": 311.8515625, + "completion_length/mode_0": 330.546875, + "completion_length/mode_1": 293.15625, + "epoch": 0.09645131938125569, + "format_confidence": 0.5, + "grad_norm": 0.7435578662413549, + "grounded_proportion": 0.5, + "kl": 0.0233154296875, + "learning_rate": 9.044585987261146e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.453125, + "reward_std": 0.3033941984176636, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.453125, + "step": 106 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.625, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0625, + "TT_Math/mode_0": 0.39705882352941174, + "TT_Math/mode_1": 0.4264705882352941, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.4166666666666667, + "TT_Others/mode_1": 0.6666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 379.23046875, + "completion_length/mode_0": 410.96875, + "completion_length/mode_1": 347.4921875, + "epoch": 0.09736123748862602, + "format_confidence": 0.5, + "grad_norm": 1.95451747528783, + "grounded_proportion": 0.5, + "kl": 0.031494140625, + "learning_rate": 9.035486806187442e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.3671875, + "reward_std": 0.2845958471298218, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3671875, + "step": 107 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.35, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5394736842105263, + "TT_Math/mode_1": 0.5394736842105263, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 430.86328125, + "completion_length/mode_0": 444.203125, + "completion_length/mode_1": 417.5234375, + "epoch": 0.09827115559599636, + "format_confidence": 0.5, + "grad_norm": 0.6617312507830653, + "grounded_proportion": 0.5, + "kl": 0.0247802734375, + "learning_rate": 9.02638762511374e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.01171875, + "reward": 1.40234375, + "reward_std": 0.2459551990032196, + "rewards/format_reward": 0.98828125, + "rewards/general_task_reward": 0.4140625, + "step": 108 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.125, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 1.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.55, + "TT_Math/mode_1": 0.43333333333333335, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.4166666666666667, + "completion_length": 364.859375, + "completion_length/mode_0": 396.6875, + "completion_length/mode_1": 333.03125, + "epoch": 0.09918107370336669, + "format_confidence": 0.5, + "grad_norm": 1.1162695339468396, + "grounded_proportion": 0.5, + "kl": 0.0306396484375, + "learning_rate": 9.017288444040037e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.306543231010437, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.38671875, + "step": 109 + }, + { + "TT_Chart/mode_0": 0.6, + "TT_Chart/mode_1": 0.55, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.45, + "TT_Detection/mode_0": 0.3333333333333333, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.4117647058823529, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 350.5859375, + "completion_length/mode_0": 396.2421875, + "completion_length/mode_1": 304.9296875, + "epoch": 0.10009099181073704, + "format_confidence": 0.5, + "grad_norm": 1.4082884314141118, + "grounded_proportion": 0.5, + "kl": 0.0264892578125, + "learning_rate": 9.008189262966332e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.00390625, + "reward": 1.421875, + "reward_std": 0.372009813785553, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.42578125, + "step": 110 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.3333333333333333, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.5263157894736842, + "TT_Math/mode_1": 0.5263157894736842, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 272.01953125, + "completion_length/mode_0": 287.265625, + "completion_length/mode_1": 256.7734375, + "epoch": 0.10100090991810737, + "format_confidence": 0.5, + "grad_norm": 14.658431508796246, + "grounded_proportion": 0.5, + "kl": 0.034423828125, + "learning_rate": 8.99909008189263e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.46875, + "reward_std": 0.29970598220825195, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.46875, + "step": 111 + }, + { + "TT_Chart/mode_0": 0.16666666666666666, + "TT_Chart/mode_1": 0.16666666666666666, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.6052631578947368, + "TT_Math/mode_1": 0.5526315789473685, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.5, + "completion_length": 357.01171875, + "completion_length/mode_0": 377.296875, + "completion_length/mode_1": 336.7265625, + "epoch": 0.10191082802547771, + "format_confidence": 0.5, + "grad_norm": 0.8195338040008043, + "grounded_proportion": 0.5, + "kl": 0.030517578125, + "learning_rate": 8.989990900818926e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.45703125, + "reward_std": 0.305894136428833, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.45703125, + "step": 112 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.625, + "TT_Counting/mode_0": 0.5833333333333334, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.4852941176470588, + "TT_Math/mode_1": 0.4852941176470588, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.4375, + "completion_length": 337.96875, + "completion_length/mode_0": 374.4453125, + "completion_length/mode_1": 301.4921875, + "epoch": 0.10282074613284804, + "format_confidence": 0.5, + "grad_norm": 0.8731230125231162, + "grounded_proportion": 0.5, + "kl": 0.026123046875, + "learning_rate": 8.980891719745222e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.453125, + "reward_std": 0.246971994638443, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.453125, + "step": 113 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.3125, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.9166666666666666, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6071428571428571, + "TT_Math/mode_1": 0.5357142857142857, + "TT_OCR/mode_0": 0.125, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.0625, + "TT_Puzzle/mode_0": 1.0, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.0, + "completion_length": 338.16015625, + "completion_length/mode_0": 361.3359375, + "completion_length/mode_1": 314.984375, + "epoch": 0.10373066424021839, + "format_confidence": 0.5, + "grad_norm": 0.7811590590595154, + "grounded_proportion": 0.5, + "kl": 0.027587890625, + "learning_rate": 8.971792538671519e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.23079612851142883, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 114 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.3382352941176471, + "TT_Math/mode_1": 0.39705882352941174, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.125, + "completion_length": 328.72265625, + "completion_length/mode_0": 343.1015625, + "completion_length/mode_1": 314.34375, + "epoch": 0.10464058234758872, + "format_confidence": 0.5, + "grad_norm": 0.8326183125105682, + "grounded_proportion": 0.5, + "kl": 0.037353515625, + "learning_rate": 8.962693357597816e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.3203125, + "reward_std": 0.2396092265844345, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3203125, + "step": 115 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.8333333333333334, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.5384615384615384, + "TT_Math/mode_1": 0.5769230769230769, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.16666666666666666, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.40625, + "TT_Science/mode_1": 0.21875, + "completion_length": 325.96484375, + "completion_length/mode_0": 346.40625, + "completion_length/mode_1": 305.5234375, + "epoch": 0.10555050045495905, + "format_confidence": 0.5, + "grad_norm": 1.1036768563812662, + "grounded_proportion": 0.5, + "kl": 0.0242919921875, + "learning_rate": 8.953594176524113e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.4296875, + "reward_std": 0.3274608254432678, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4296875, + "step": 116 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.5833333333333334, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.16666666666666666, + "TT_Document/mode_1": 0.08333333333333333, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5166666666666667, + "TT_Math/mode_1": 0.5833333333333334, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.5833333333333334, + "TT_Science/mode_1": 0.5833333333333334, + "completion_length": 330.74609375, + "completion_length/mode_0": 349.3984375, + "completion_length/mode_1": 312.09375, + "epoch": 0.10646041856232939, + "format_confidence": 0.5, + "grad_norm": 0.760986746071062, + "grounded_proportion": 0.5, + "kl": 0.02978515625, + "learning_rate": 8.944494995450409e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.1817479431629181, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 117 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.42857142857142855, + "TT_Counting/mode_1": 0.6071428571428571, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.45588235294117646, + "TT_Math/mode_1": 0.47058823529411764, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.5833333333333334, + "TT_Science/mode_1": 0.5833333333333334, + "completion_length": 403.1484375, + "completion_length/mode_0": 429.703125, + "completion_length/mode_1": 376.59375, + "epoch": 0.10737033666969972, + "format_confidence": 0.5, + "grad_norm": 0.8553481604117894, + "grounded_proportion": 0.5, + "kl": 0.0291748046875, + "learning_rate": 8.935395814376706e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.45703125, + "reward_std": 0.308400422334671, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.45703125, + "step": 118 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.5394736842105263, + "TT_Math/mode_1": 0.39473684210526316, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.25, + "completion_length": 328.36328125, + "completion_length/mode_0": 361.7578125, + "completion_length/mode_1": 294.96875, + "epoch": 0.10828025477707007, + "format_confidence": 0.5, + "grad_norm": 1.3318825940512236, + "grounded_proportion": 0.5, + "kl": 0.033203125, + "learning_rate": 8.926296633303002e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.3984375, + "reward_std": 0.3040344715118408, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3984375, + "step": 119 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.4625, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.4166666666666667, + "TT_Science/mode_1": 0.25, + "completion_length": 424.55859375, + "completion_length/mode_0": 449.5390625, + "completion_length/mode_1": 399.578125, + "epoch": 0.1091901728844404, + "format_confidence": 0.5, + "grad_norm": 1.1258840181711312, + "grounded_proportion": 0.5, + "kl": 0.0235595703125, + "learning_rate": 8.917197452229299e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.375, + "reward_std": 0.32681170105934143, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.375, + "step": 120 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 1.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.75, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.4852941176470588, + "TT_Math/mode_1": 0.5588235294117647, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.375, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 1.0, + "TT_Science/mode_0": 0.4, + "TT_Science/mode_1": 0.4, + "completion_length": 337.171875, + "completion_length/mode_0": 355.90625, + "completion_length/mode_1": 318.4375, + "epoch": 0.11010009099181074, + "format_confidence": 0.5, + "grad_norm": 1.8863415714384077, + "grounded_proportion": 0.5, + "kl": 0.029541015625, + "learning_rate": 8.908098271155595e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.4375, + "reward_std": 0.337970107793808, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4375, + "step": 121 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.1875, + "TT_Math/mode_0": 0.6, + "TT_Math/mode_1": 0.5333333333333333, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 318.25, + "completion_length/mode_0": 324.25, + "completion_length/mode_1": 312.25, + "epoch": 0.11101000909918107, + "format_confidence": 0.5, + "grad_norm": 0.7478850656639994, + "grounded_proportion": 0.5, + "kl": 0.0390625, + "learning_rate": 8.898999090081893e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.00390625, + "reward": 1.36328125, + "reward_std": 0.24553291499614716, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.3671875, + "step": 122 + }, + { + "TT_Chart/mode_0": 0.3, + "TT_Chart/mode_1": 0.15, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.16666666666666666, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.39285714285714285, + "TT_Math/mode_1": 0.3392857142857143, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.25, + "completion_length": 299.01171875, + "completion_length/mode_0": 332.2109375, + "completion_length/mode_1": 265.8125, + "epoch": 0.11191992720655142, + "format_confidence": 0.5, + "grad_norm": 1.0087357565512716, + "grounded_proportion": 0.5, + "kl": 0.0294189453125, + "learning_rate": 8.889899909008188e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.2734375, + "reward_std": 0.3007756471633911, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.27734375, + "step": 123 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.3333333333333333, + "TT_Math/mode_0": 0.4027777777777778, + "TT_Math/mode_1": 0.4583333333333333, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.5, + "TT_Others/mode_1": 0.375, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.4375, + "TT_Science/mode_1": 0.5, + "completion_length": 372.5859375, + "completion_length/mode_0": 401.2109375, + "completion_length/mode_1": 343.9609375, + "epoch": 0.11282984531392175, + "format_confidence": 0.5, + "grad_norm": 0.8137821391046608, + "grounded_proportion": 0.5, + "kl": 0.041015625, + "learning_rate": 8.880800727934485e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.2759014368057251, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.39453125, + "step": 124 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.16666666666666666, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.5166666666666667, + "TT_Math/mode_1": 0.55, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.125, + "completion_length": 355.76171875, + "completion_length/mode_0": 365.4296875, + "completion_length/mode_1": 346.09375, + "epoch": 0.11373976342129208, + "format_confidence": 0.5, + "grad_norm": 0.7654413326806154, + "grounded_proportion": 0.5, + "kl": 0.036865234375, + "learning_rate": 8.871701546860783e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0078125, + "reward": 1.33984375, + "reward_std": 0.26253271102905273, + "rewards/format_reward": 0.9921875, + "rewards/general_task_reward": 0.34765625, + "step": 125 + }, + { + "TT_Chart/mode_0": 0.3333333333333333, + "TT_Chart/mode_1": 0.3333333333333333, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.525, + "TT_Math/mode_1": 0.45, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.75, + "completion_length": 371.875, + "completion_length/mode_0": 405.6640625, + "completion_length/mode_1": 338.0859375, + "epoch": 0.11464968152866242, + "format_confidence": 0.5, + "grad_norm": 0.6049711014702803, + "grounded_proportion": 0.5, + "kl": 0.036376953125, + "learning_rate": 8.862602365787079e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.00390625, + "reward": 1.4140625, + "reward_std": 0.28288590908050537, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.41796875, + "step": 126 + }, + { + "TT_Chart/mode_0": 0.4, + "TT_Chart/mode_1": 0.4, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.75, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4444444444444444, + "TT_Math/mode_1": 0.5277777777777778, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 325.46484375, + "completion_length/mode_0": 355.6796875, + "completion_length/mode_1": 295.25, + "epoch": 0.11555959963603275, + "format_confidence": 0.5, + "grad_norm": 1.8410793961989773, + "grounded_proportion": 0.5, + "kl": 0.033203125, + "learning_rate": 8.853503184713375e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.39453125, + "reward_std": 0.2690715193748474, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.39453125, + "step": 127 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.75, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.5625, + "TT_Math/mode_1": 0.515625, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5416666666666666, + "TT_Science/mode_1": 0.5, + "completion_length": 320.73046875, + "completion_length/mode_0": 337.2265625, + "completion_length/mode_1": 304.234375, + "epoch": 0.1164695177434031, + "format_confidence": 0.5, + "grad_norm": 0.6389432250465099, + "grounded_proportion": 0.5, + "kl": 0.0242919921875, + "learning_rate": 8.844404003639672e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.45703125, + "reward_std": 0.23922216892242432, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.45703125, + "step": 128 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.4375, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6666666666666666, + "TT_Math/mode_1": 0.5166666666666667, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.3333333333333333, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.75, + "completion_length": 406.078125, + "completion_length/mode_0": 416.359375, + "completion_length/mode_1": 395.796875, + "epoch": 0.11737943585077343, + "format_confidence": 0.5, + "grad_norm": 0.5631128640852374, + "grounded_proportion": 0.5, + "kl": 0.038818359375, + "learning_rate": 8.835304822565969e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.48828125, + "reward_std": 0.26944732666015625, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.48828125, + "step": 129 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.125, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.4375, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.6, + "TT_Math/mode_1": 0.6, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.3333333333333333, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 353.71484375, + "completion_length/mode_0": 367.8984375, + "completion_length/mode_1": 339.53125, + "epoch": 0.11828935395814377, + "format_confidence": 0.5, + "grad_norm": 1.1261868640139805, + "grounded_proportion": 0.5, + "kl": 0.032470703125, + "learning_rate": 8.826205641492264e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.4453125, + "reward_std": 0.2877512276172638, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4453125, + "step": 130 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.375, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.46875, + "TT_Math/mode_1": 0.40625, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 1.0, + "completion_length": 301.109375, + "completion_length/mode_0": 332.0390625, + "completion_length/mode_1": 270.1796875, + "epoch": 0.1191992720655141, + "format_confidence": 0.5, + "grad_norm": 1.3174322812929569, + "grounded_proportion": 0.5, + "kl": 0.05322265625, + "learning_rate": 8.817106460418562e-07, + "loss": 0.0021, + "over_lengthy_sequences": 0.0, + "reward": 1.3671875, + "reward_std": 0.23277445137500763, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3671875, + "step": 131 + }, + { + "TT_Chart/mode_0": 0.39285714285714285, + "TT_Chart/mode_1": 0.35714285714285715, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.375, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5555555555555556, + "TT_Math/mode_1": 0.5277777777777778, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.75, + "completion_length": 325.77734375, + "completion_length/mode_0": 339.9296875, + "completion_length/mode_1": 311.625, + "epoch": 0.12010919017288443, + "format_confidence": 0.5, + "grad_norm": 0.7069147924688921, + "grounded_proportion": 0.5, + "kl": 0.022705078125, + "learning_rate": 8.808007279344859e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.453125, + "reward_std": 0.2563130855560303, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.453125, + "step": 132 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.16666666666666666, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.75, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.484375, + "TT_Math/mode_1": 0.328125, + "TT_OCR/mode_0": 0.125, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5833333333333334, + "completion_length": 326.15234375, + "completion_length/mode_0": 331.0, + "completion_length/mode_1": 321.3046875, + "epoch": 0.12101910828025478, + "format_confidence": 0.5, + "grad_norm": 1.4281295890879266, + "grounded_proportion": 0.5, + "kl": 0.04443359375, + "learning_rate": 8.798908098271155e-07, + "loss": 0.0018, + "over_lengthy_sequences": 0.0, + "reward": 1.40234375, + "reward_std": 0.2764293849468231, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40234375, + "step": 133 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.125, + "TT_Counting/mode_0": 0.35, + "TT_Counting/mode_1": 0.2, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.5535714285714286, + "TT_Math/mode_1": 0.5714285714285714, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.25, + "completion_length": 311.26171875, + "completion_length/mode_0": 341.046875, + "completion_length/mode_1": 281.4765625, + "epoch": 0.12192902638762511, + "format_confidence": 0.5, + "grad_norm": 2.805065330097895, + "grounded_proportion": 0.5, + "kl": 0.0291748046875, + "learning_rate": 8.789808917197452e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.37109375, + "reward_std": 0.284593403339386, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.37109375, + "step": 134 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.6666666666666666, + "TT_Counting/mode_1": 0.6666666666666666, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.5833333333333334, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 375.53125, + "completion_length/mode_0": 389.0234375, + "completion_length/mode_1": 362.0390625, + "epoch": 0.12283894449499545, + "format_confidence": 0.5, + "grad_norm": 0.7313698173703068, + "grounded_proportion": 0.5, + "kl": 0.0250244140625, + "learning_rate": 8.780709736123748e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.4296875, + "reward_std": 0.2764318287372589, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4296875, + "step": 135 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.55, + "TT_Counting/mode_1": 0.35, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.4, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 378.30078125, + "completion_length/mode_0": 388.0859375, + "completion_length/mode_1": 368.515625, + "epoch": 0.12374886260236578, + "format_confidence": 0.5, + "grad_norm": 1.3879851931562597, + "grounded_proportion": 0.5, + "kl": 0.02783203125, + "learning_rate": 8.771610555050046e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.3515625, + "reward_std": 0.29143065214157104, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3515625, + "step": 136 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.08333333333333333, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.5625, + "TT_Math/mode_1": 0.6041666666666666, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 366.40625, + "completion_length/mode_0": 374.453125, + "completion_length/mode_1": 358.359375, + "epoch": 0.12465878070973613, + "format_confidence": 0.5, + "grad_norm": 0.7633680054263763, + "grounded_proportion": 0.5, + "kl": 0.02880859375, + "learning_rate": 8.762511373976341e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.47265625, + "reward_std": 0.3259276747703552, + "rewards/format_reward": 0.98828125, + "rewards/general_task_reward": 0.484375, + "step": 137 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5238095238095238, + "TT_Math/mode_1": 0.4880952380952381, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.5, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.0, + "completion_length": 412.8359375, + "completion_length/mode_0": 415.609375, + "completion_length/mode_1": 410.0625, + "epoch": 0.12556869881710647, + "format_confidence": 0.5, + "grad_norm": 5.3443264326908135, + "grounded_proportion": 0.5, + "kl": 0.026611328125, + "learning_rate": 8.753412192902638e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.3915102481842041, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 138 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.42045454545454547, + "TT_Math/mode_1": 0.3181818181818182, + "TT_OCR/mode_0": 0.6666666666666666, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.0, + "completion_length": 423.390625, + "completion_length/mode_0": 448.96875, + "completion_length/mode_1": 397.8125, + "epoch": 0.1264786169244768, + "format_confidence": 0.5, + "grad_norm": 6.032568642838766, + "grounded_proportion": 0.5, + "kl": 0.033203125, + "learning_rate": 8.744313011828936e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.36328125, + "reward_std": 0.308400422334671, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.36328125, + "step": 139 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.4, + "TT_Document/mode_1": 0.35, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6875, + "TT_Math/mode_1": 0.703125, + "TT_OCR/mode_0": 0.6666666666666666, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.041666666666666664, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.0, + "completion_length": 343.56640625, + "completion_length/mode_0": 358.5234375, + "completion_length/mode_1": 328.609375, + "epoch": 0.12738853503184713, + "format_confidence": 0.5, + "grad_norm": 0.6882157506953025, + "grounded_proportion": 0.5, + "kl": 0.037353515625, + "learning_rate": 8.735213830755232e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.4375, + "reward_std": 0.19792133569717407, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4375, + "step": 140 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5476190476190477, + "TT_Math/mode_1": 0.5476190476190477, + "TT_OCR/mode_0": 0.6666666666666666, + "TT_OCR/mode_1": 0.3333333333333333, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 1.0, + "completion_length": 348.94140625, + "completion_length/mode_0": 355.53125, + "completion_length/mode_1": 342.3515625, + "epoch": 0.12829845313921748, + "format_confidence": 0.5, + "grad_norm": 0.7339055309204625, + "grounded_proportion": 0.5, + "kl": 0.044189453125, + "learning_rate": 8.726114649681528e-07, + "loss": 0.0018, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.2690690755844116, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 141 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.375, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4852941176470588, + "TT_Math/mode_1": 0.47058823529411764, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.16666666666666666, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.2, + "TT_Science/mode_1": 0.25, + "completion_length": 386.5390625, + "completion_length/mode_0": 398.9375, + "completion_length/mode_1": 374.140625, + "epoch": 0.1292083712465878, + "format_confidence": 0.5, + "grad_norm": 0.5546463094726118, + "grounded_proportion": 0.5, + "kl": 0.0262451171875, + "learning_rate": 8.717015468607825e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.33203125, + "reward_std": 0.27130940556526184, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.33203125, + "step": 142 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.35, + "TT_Counting/mode_0": 0.4, + "TT_Counting/mode_1": 0.2, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.375, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6041666666666666, + "TT_Math/mode_1": 0.5625, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.875, + "TT_Science/mode_1": 1.0, + "completion_length": 287.44921875, + "completion_length/mode_0": 305.828125, + "completion_length/mode_1": 269.0703125, + "epoch": 0.13011828935395814, + "format_confidence": 0.5, + "grad_norm": 0.889656894734258, + "grounded_proportion": 0.5, + "kl": 0.031982421875, + "learning_rate": 8.707916287534122e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.4296875, + "reward_std": 0.30077171325683594, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4296875, + "step": 143 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.2916666666666667, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.375, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.45454545454545453, + "TT_Math/mode_1": 0.45454545454545453, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.041666666666666664, + "TT_Others/mode_1": 0.2916666666666667, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.3333333333333333, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 300.1171875, + "completion_length/mode_0": 312.078125, + "completion_length/mode_1": 288.15625, + "epoch": 0.13102820746132848, + "format_confidence": 0.5, + "grad_norm": 0.8824978684287549, + "grounded_proportion": 0.5, + "kl": 0.03515625, + "learning_rate": 8.698817106460417e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.31640625, + "reward_std": 0.30931398272514343, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.31640625, + "step": 144 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.125, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.375, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.5735294117647058, + "TT_Math/mode_1": 0.5147058823529411, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.65, + "TT_Science/mode_1": 0.5, + "completion_length": 360.56640625, + "completion_length/mode_0": 382.359375, + "completion_length/mode_1": 338.7734375, + "epoch": 0.13193812556869883, + "format_confidence": 0.5, + "grad_norm": 0.7721949198133805, + "grounded_proportion": 0.5, + "kl": 0.0274658203125, + "learning_rate": 8.689717925386715e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.46875, + "reward_std": 0.33547264337539673, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.46875, + "step": 145 + }, + { + "TT_Chart/mode_0": 0.875, + "TT_Chart/mode_1": 0.75, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.4605263157894737, + "TT_Math/mode_1": 0.4473684210526316, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 344.33203125, + "completion_length/mode_0": 366.6171875, + "completion_length/mode_1": 322.046875, + "epoch": 0.13284804367606914, + "format_confidence": 0.5, + "grad_norm": 0.9909534863047289, + "grounded_proportion": 0.5, + "kl": 0.0238037109375, + "learning_rate": 8.680618744313012e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.324045866727829, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 146 + }, + { + "TT_Chart/mode_0": 0.875, + "TT_Chart/mode_1": 0.75, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.08333333333333333, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 0.625, + "TT_Math/mode_0": 0.3472222222222222, + "TT_Math/mode_1": 0.375, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.3125, + "completion_length": 347.421875, + "completion_length/mode_0": 357.6640625, + "completion_length/mode_1": 337.1796875, + "epoch": 0.1337579617834395, + "format_confidence": 0.5, + "grad_norm": 0.720949141572624, + "grounded_proportion": 0.5, + "kl": 0.034423828125, + "learning_rate": 8.671519563239307e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.328125, + "reward_std": 0.2761722803115845, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.328125, + "step": 147 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 1.0, + "TT_Detection/mode_0": 0.3333333333333333, + "TT_Detection/mode_1": 0.5833333333333334, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.6666666666666666, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5681818181818182, + "TT_Math/mode_1": 0.5568181818181818, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.3125, + "completion_length": 340.26171875, + "completion_length/mode_0": 362.578125, + "completion_length/mode_1": 317.9453125, + "epoch": 0.13466787989080983, + "format_confidence": 0.5, + "grad_norm": 0.7763799064119764, + "grounded_proportion": 0.5, + "kl": 0.0400390625, + "learning_rate": 8.662420382165605e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.53515625, + "reward_std": 0.288013219833374, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.53515625, + "step": 148 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.6666666666666666, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.4583333333333333, + "TT_Math/mode_1": 0.4583333333333333, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 456.44140625, + "completion_length/mode_0": 481.53125, + "completion_length/mode_1": 431.3515625, + "epoch": 0.13557779799818018, + "format_confidence": 0.5, + "grad_norm": 0.6076513635609712, + "grounded_proportion": 0.5, + "kl": 0.03271484375, + "learning_rate": 8.653321201091901e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.40625, + "reward_std": 0.25460314750671387, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.40625, + "step": 149 + }, + { + "TT_Chart/mode_0": 0.35, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.08333333333333333, + "TT_Detection/mode_0": 0.3125, + "TT_Detection/mode_1": 0.3125, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.48333333333333334, + "TT_Math/mode_1": 0.5333333333333333, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.5, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 337.10546875, + "completion_length/mode_0": 345.125, + "completion_length/mode_1": 329.0859375, + "epoch": 0.1364877161055505, + "format_confidence": 0.5, + "grad_norm": 0.7513140959836652, + "grounded_proportion": 0.5, + "kl": 0.048583984375, + "learning_rate": 8.644222020018199e-07, + "loss": 0.0019, + "over_lengthy_sequences": 0.0, + "reward": 1.390625, + "reward_std": 0.2961748242378235, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.390625, + "step": 150 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5972222222222222, + "TT_Math/mode_1": 0.6388888888888888, + "TT_OCR/mode_0": 0.4375, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 315.36328125, + "completion_length/mode_0": 329.984375, + "completion_length/mode_1": 300.7421875, + "epoch": 0.13739763421292084, + "format_confidence": 0.5, + "grad_norm": 1.0275263126976402, + "grounded_proportion": 0.5, + "kl": 0.037841796875, + "learning_rate": 8.635122838944494e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.3984375, + "reward_std": 0.24172601103782654, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3984375, + "step": 151 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.5131578947368421, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.625, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 313.54296875, + "completion_length/mode_0": 326.3203125, + "completion_length/mode_1": 300.765625, + "epoch": 0.13830755232029118, + "format_confidence": 0.5, + "grad_norm": 0.9977927113854368, + "grounded_proportion": 0.5, + "kl": 0.0299072265625, + "learning_rate": 8.626023657870791e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.28182509541511536, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 152 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.6666666666666666, + "TT_Counting/mode_1": 0.5833333333333334, + "TT_Detection/mode_0": 0.16666666666666666, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.125, + "TT_Math/mode_0": 0.5833333333333334, + "TT_Math/mode_1": 0.5555555555555556, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.875, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.5, + "completion_length": 390.1796875, + "completion_length/mode_0": 406.2421875, + "completion_length/mode_1": 374.1171875, + "epoch": 0.1392174704276615, + "format_confidence": 0.5, + "grad_norm": 0.7680531984092493, + "grounded_proportion": 0.5, + "kl": 0.040771484375, + "learning_rate": 8.616924476797089e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.00390625, + "reward": 1.47265625, + "reward_std": 0.30024129152297974, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.4765625, + "step": 153 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 1.0, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6666666666666666, + "TT_Math/mode_1": 0.5694444444444444, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 256.26171875, + "completion_length/mode_0": 282.7109375, + "completion_length/mode_1": 229.8125, + "epoch": 0.14012738853503184, + "format_confidence": 0.5, + "grad_norm": 0.8865413491194238, + "grounded_proportion": 0.5, + "kl": 0.04150390625, + "learning_rate": 8.607825295723384e-07, + "loss": 0.0017, + "over_lengthy_sequences": 0.0, + "reward": 1.43359375, + "reward_std": 0.2194880098104477, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.43359375, + "step": 154 + }, + { + "TT_Chart/mode_0": 0.5416666666666666, + "TT_Chart/mode_1": 0.5416666666666666, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.625, + "TT_Math/mode_1": 0.575, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.375, + "completion_length": 367.86328125, + "completion_length/mode_0": 385.09375, + "completion_length/mode_1": 350.6328125, + "epoch": 0.1410373066424022, + "format_confidence": 0.5, + "grad_norm": 0.9428079277390882, + "grounded_proportion": 0.5, + "kl": 0.0299072265625, + "learning_rate": 8.598726114649681e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.49609375, + "reward_std": 0.32734215259552, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.5, + "step": 155 + }, + { + "TT_Chart/mode_0": 0.4, + "TT_Chart/mode_1": 0.35, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 0.6875, + "TT_Math/mode_0": 0.515625, + "TT_Math/mode_1": 0.484375, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.875, + "TT_Science/mode_1": 0.625, + "completion_length": 298.578125, + "completion_length/mode_0": 303.390625, + "completion_length/mode_1": 293.765625, + "epoch": 0.14194722474977253, + "format_confidence": 0.5, + "grad_norm": 2.2779976937162885, + "grounded_proportion": 0.5, + "kl": 0.06103515625, + "learning_rate": 8.589626933575978e-07, + "loss": 0.0024, + "over_lengthy_sequences": 0.0, + "reward": 1.453125, + "reward_std": 0.2784101665019989, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.453125, + "step": 156 + }, + { + "TT_Chart/mode_0": 0.125, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.625, + "TT_Document/mode_1": 0.625, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 0.6875, + "TT_Math/mode_0": 0.6578947368421053, + "TT_Math/mode_1": 0.631578947368421, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.4166666666666667, + "TT_Science/mode_1": 0.75, + "completion_length": 419.5, + "completion_length/mode_0": 432.984375, + "completion_length/mode_1": 406.015625, + "epoch": 0.14285714285714285, + "format_confidence": 0.5, + "grad_norm": 0.8454361433490853, + "grounded_proportion": 0.5, + "kl": 0.03466796875, + "learning_rate": 8.580527752502275e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.5234375, + "reward_std": 0.2661820948123932, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.5234375, + "step": 157 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 1.0, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.44047619047619047, + "TT_Math/mode_1": 0.4523809523809524, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 350.96484375, + "completion_length/mode_0": 365.6484375, + "completion_length/mode_1": 336.28125, + "epoch": 0.1437670609645132, + "format_confidence": 0.5, + "grad_norm": 0.41622087929958396, + "grounded_proportion": 0.5, + "kl": 0.03369140625, + "learning_rate": 8.57142857142857e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.32421875, + "reward_std": 0.14992907643318176, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.32421875, + "step": 158 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.1875, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.3, + "TT_Math/mode_0": 0.4, + "TT_Math/mode_1": 0.3333333333333333, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 299.73828125, + "completion_length/mode_0": 326.0546875, + "completion_length/mode_1": 273.421875, + "epoch": 0.14467697907188354, + "format_confidence": 0.5, + "grad_norm": 1.0658096903762169, + "grounded_proportion": 0.5, + "kl": 0.04296875, + "learning_rate": 8.562329390354868e-07, + "loss": 0.0017, + "over_lengthy_sequences": 0.0, + "reward": 1.28515625, + "reward_std": 0.28972315788269043, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.28515625, + "step": 159 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.3333333333333333, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 1.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.4868421052631579, + "TT_Math/mode_1": 0.5131578947368421, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.16666666666666666, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.625, + "TT_Science/mode_1": 0.4375, + "completion_length": 399.140625, + "completion_length/mode_0": 402.296875, + "completion_length/mode_1": 395.984375, + "epoch": 0.14558689717925385, + "format_confidence": 0.5, + "grad_norm": 1.3202914229642293, + "grounded_proportion": 0.5, + "kl": 0.0294189453125, + "learning_rate": 8.553230209281165e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.47265625, + "reward_std": 0.2205488383769989, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.47265625, + "step": 160 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.16666666666666666, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.625, + "TT_Math/mode_1": 0.5892857142857143, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.16666666666666666, + "TT_Others/mode_1": 0.4166666666666667, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.16666666666666666, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 324.12109375, + "completion_length/mode_0": 342.9296875, + "completion_length/mode_1": 305.3125, + "epoch": 0.1464968152866242, + "format_confidence": 0.5, + "grad_norm": 0.9445520944835173, + "grounded_proportion": 0.5, + "kl": 0.040771484375, + "learning_rate": 8.54413102820746e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.00390625, + "reward": 1.390625, + "reward_std": 0.29116618633270264, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.39453125, + "step": 161 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.65, + "TT_Counting/mode_1": 0.65, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.36666666666666664, + "TT_Math/mode_1": 0.43333333333333335, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.1875, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.16666666666666666, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 336.70703125, + "completion_length/mode_0": 345.09375, + "completion_length/mode_1": 328.3203125, + "epoch": 0.14740673339399454, + "format_confidence": 0.5, + "grad_norm": 1.053194115355607, + "grounded_proportion": 0.5, + "kl": 0.043701171875, + "learning_rate": 8.535031847133758e-07, + "loss": 0.0017, + "over_lengthy_sequences": 0.0, + "reward": 1.3984375, + "reward_std": 0.18345540761947632, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.40234375, + "step": 162 + }, + { + "TT_Chart/mode_0": 0.16666666666666666, + "TT_Chart/mode_1": 0.3333333333333333, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5714285714285714, + "TT_Math/mode_1": 0.5833333333333334, + "TT_OCR/mode_0": 0.125, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.5833333333333334, + "TT_Others/mode_1": 0.4166666666666667, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.16666666666666666, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 394.390625, + "completion_length/mode_0": 414.21875, + "completion_length/mode_1": 374.5625, + "epoch": 0.1483166515013649, + "format_confidence": 0.5, + "grad_norm": 0.949326413337948, + "grounded_proportion": 0.5, + "kl": 0.03515625, + "learning_rate": 8.525932666060054e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.4765625, + "reward_std": 0.2801200747489929, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4765625, + "step": 163 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.4166666666666667, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.375, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5375, + "TT_Math/mode_1": 0.575, + "TT_OCR/mode_0": 0.25, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.16666666666666666, + "TT_Science/mode_1": 0.08333333333333333, + "completion_length": 326.171875, + "completion_length/mode_0": 345.1796875, + "completion_length/mode_1": 307.1640625, + "epoch": 0.1492265696087352, + "format_confidence": 0.5, + "grad_norm": 1.0393763681729113, + "grounded_proportion": 0.5, + "kl": 0.042236328125, + "learning_rate": 8.516833484986351e-07, + "loss": 0.0017, + "over_lengthy_sequences": 0.0, + "reward": 1.4140625, + "reward_std": 0.2548676133155823, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4140625, + "step": 164 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.08333333333333333, + "TT_Detection/mode_0": 0.4166666666666667, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.375, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6166666666666667, + "TT_Math/mode_1": 0.5333333333333333, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.25, + "completion_length": 306.5078125, + "completion_length/mode_0": 308.4765625, + "completion_length/mode_1": 304.5390625, + "epoch": 0.15013648771610555, + "format_confidence": 0.5, + "grad_norm": 1.4513886148665085, + "grounded_proportion": 0.5, + "kl": 0.039306640625, + "learning_rate": 8.507734303912647e-07, + "loss": 0.0016, + "over_lengthy_sequences": 0.0, + "reward": 1.45703125, + "reward_std": 0.3153514266014099, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.45703125, + "step": 165 + }, + { + "TT_Chart/mode_0": 0.625, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.08333333333333333, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.36538461538461536, + "TT_Math/mode_1": 0.40384615384615385, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.16666666666666666, + "TT_Others/mode_1": 0.3333333333333333, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.08333333333333333, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 286.8515625, + "completion_length/mode_0": 297.140625, + "completion_length/mode_1": 276.5625, + "epoch": 0.1510464058234759, + "format_confidence": 0.5, + "grad_norm": 1.232171973166691, + "grounded_proportion": 0.5, + "kl": 0.0673828125, + "learning_rate": 8.498635122838944e-07, + "loss": 0.0027, + "over_lengthy_sequences": 0.0, + "reward": 1.2578125, + "reward_std": 0.3183930218219757, + "rewards/format_reward": 0.98828125, + "rewards/general_task_reward": 0.26953125, + "step": 166 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.4375, + "TT_Counting/mode_1": 0.3125, + "TT_Detection/mode_0": 0.4166666666666667, + "TT_Detection/mode_1": 0.4166666666666667, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5833333333333334, + "TT_Math/mode_1": 0.5277777777777778, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.0, + "TT_Puzzle/mode_1": 0.0, + "TT_Science/mode_0": 0.08333333333333333, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 284.21484375, + "completion_length/mode_0": 311.2109375, + "completion_length/mode_1": 257.21875, + "epoch": 0.15195632393084624, + "format_confidence": 0.5, + "grad_norm": 0.9435576850130559, + "grounded_proportion": 0.5, + "kl": 0.050048828125, + "learning_rate": 8.489535941765242e-07, + "loss": 0.002, + "over_lengthy_sequences": 0.0, + "reward": 1.4140625, + "reward_std": 0.2546031177043915, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4140625, + "step": 167 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.08333333333333333, + "TT_Counting/mode_0": 0.4375, + "TT_Counting/mode_1": 0.3125, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.3977272727272727, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 422.70703125, + "completion_length/mode_0": 433.296875, + "completion_length/mode_1": 412.1171875, + "epoch": 0.15286624203821655, + "format_confidence": 0.5, + "grad_norm": 0.6448597730106973, + "grounded_proportion": 0.5, + "kl": 0.037109375, + "learning_rate": 8.480436760691537e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.37109375, + "reward_std": 0.27813929319381714, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.37109375, + "step": 168 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.4375, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.609375, + "TT_Math/mode_1": 0.578125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.08333333333333333, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 337.109375, + "completion_length/mode_0": 346.390625, + "completion_length/mode_1": 327.828125, + "epoch": 0.1537761601455869, + "format_confidence": 0.5, + "grad_norm": 0.7151475312153581, + "grounded_proportion": 0.5, + "kl": 0.044921875, + "learning_rate": 8.471337579617834e-07, + "loss": 0.0018, + "over_lengthy_sequences": 0.0, + "reward": 1.39453125, + "reward_std": 0.229889914393425, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.39453125, + "step": 169 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.1875, + "TT_Counting/mode_0": 1.0, + "TT_Counting/mode_1": 1.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.28125, + "TT_Math/mode_1": 0.296875, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.3, + "completion_length": 304.55078125, + "completion_length/mode_0": 310.8515625, + "completion_length/mode_1": 298.25, + "epoch": 0.15468607825295724, + "format_confidence": 0.5, + "grad_norm": 0.5636664720948796, + "grounded_proportion": 0.5, + "kl": 0.03173828125, + "learning_rate": 8.462238398544131e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.30078125, + "reward_std": 0.17939773201942444, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.30078125, + "step": 170 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5555555555555556, + "TT_Math/mode_1": 0.5555555555555556, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5833333333333334, + "TT_Science/mode_1": 0.5, + "completion_length": 359.69140625, + "completion_length/mode_0": 369.3984375, + "completion_length/mode_1": 349.984375, + "epoch": 0.15559599636032756, + "format_confidence": 0.5, + "grad_norm": 1.0708410982269836, + "grounded_proportion": 0.5, + "kl": 0.033935546875, + "learning_rate": 8.453139217470428e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.52734375, + "reward_std": 0.23448191583156586, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.52734375, + "step": 171 + }, + { + "TT_Chart/mode_0": 0.4166666666666667, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.4852941176470588, + "TT_Math/mode_1": 0.29411764705882354, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.9166666666666666, + "TT_Science/mode_1": 0.8333333333333334, + "completion_length": 343.12109375, + "completion_length/mode_0": 352.09375, + "completion_length/mode_1": 334.1484375, + "epoch": 0.1565059144676979, + "format_confidence": 0.5, + "grad_norm": 0.9973888113787986, + "grounded_proportion": 0.5, + "kl": 0.046142578125, + "learning_rate": 8.444040036396723e-07, + "loss": 0.0018, + "over_lengthy_sequences": 0.0, + "reward": 1.3828125, + "reward_std": 0.2741939425468445, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3828125, + "step": 172 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 1.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.5096153846153846, + "TT_Math/mode_1": 0.5288461538461539, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 1.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5, + "completion_length": 492.6171875, + "completion_length/mode_0": 512.953125, + "completion_length/mode_1": 472.28125, + "epoch": 0.15741583257506825, + "format_confidence": 0.5, + "grad_norm": 0.8266362533405296, + "grounded_proportion": 0.5, + "kl": 0.02978515625, + "learning_rate": 8.434940855323021e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.48046875, + "reward_std": 0.3358907699584961, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.484375, + "step": 173 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6547619047619048, + "TT_Math/mode_1": 0.6190476190476191, + "TT_OCR/mode_0": 0.75, + "TT_OCR/mode_1": 0.25, + "TT_Others/mode_0": 1.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.0, + "completion_length": 374.87890625, + "completion_length/mode_0": 395.5234375, + "completion_length/mode_1": 354.234375, + "epoch": 0.1583257506824386, + "format_confidence": 0.5, + "grad_norm": 0.6723817621947202, + "grounded_proportion": 0.5, + "kl": 0.0294189453125, + "learning_rate": 8.425841674249318e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.50390625, + "reward_std": 0.2964407503604889, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.50390625, + "step": 174 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.5833333333333334, + "TT_Counting/mode_1": 0.6666666666666666, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.65, + "TT_Math/mode_1": 0.575, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.125, + "completion_length": 386.2890625, + "completion_length/mode_0": 403.7734375, + "completion_length/mode_1": 368.8046875, + "epoch": 0.1592356687898089, + "format_confidence": 0.5, + "grad_norm": 0.6645566999099587, + "grounded_proportion": 0.5, + "kl": 0.0289306640625, + "learning_rate": 8.416742493175613e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.5546875, + "reward_std": 0.2982693314552307, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.5546875, + "step": 175 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.6666666666666666, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5952380952380952, + "TT_Math/mode_1": 0.5952380952380952, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.125, + "completion_length": 376.30859375, + "completion_length/mode_0": 382.4453125, + "completion_length/mode_1": 370.171875, + "epoch": 0.16014558689717925, + "format_confidence": 0.5, + "grad_norm": 0.7597524598153307, + "grounded_proportion": 0.5, + "kl": 0.02978515625, + "learning_rate": 8.407643312101911e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.5078125, + "reward_std": 0.23277443647384644, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.5078125, + "step": 176 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0625, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.7142857142857143, + "TT_Math/mode_1": 0.6607142857142857, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.25, + "completion_length": 353.76953125, + "completion_length/mode_0": 389.421875, + "completion_length/mode_1": 318.1171875, + "epoch": 0.1610555050045496, + "format_confidence": 0.5, + "grad_norm": 1.1293887912044984, + "grounded_proportion": 0.5, + "kl": 0.05078125, + "learning_rate": 8.398544131028207e-07, + "loss": 0.002, + "over_lengthy_sequences": 0.0, + "reward": 1.3828125, + "reward_std": 0.2761722505092621, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3828125, + "step": 177 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.7, + "TT_Counting/mode_1": 0.7, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.6022727272727273, + "TT_Math/mode_1": 0.5681818181818182, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.0, + "completion_length": 306.37109375, + "completion_length/mode_0": 315.796875, + "completion_length/mode_1": 296.9453125, + "epoch": 0.16196542311191992, + "format_confidence": 0.5, + "grad_norm": 0.7974369345469358, + "grounded_proportion": 0.5, + "kl": 0.033447265625, + "learning_rate": 8.389444949954503e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.5234375, + "reward_std": 0.2797393798828125, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.5234375, + "step": 178 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.35, + "TT_Math/mode_0": 0.5441176470588235, + "TT_Math/mode_1": 0.47058823529411764, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.375, + "completion_length": 322.7109375, + "completion_length/mode_0": 335.171875, + "completion_length/mode_1": 310.25, + "epoch": 0.16287534121929026, + "format_confidence": 0.5, + "grad_norm": 0.8987778065024237, + "grounded_proportion": 0.5, + "kl": 0.034912109375, + "learning_rate": 8.3803457688808e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.41796875, + "reward_std": 0.2861511707305908, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41796875, + "step": 179 + }, + { + "TT_Chart/mode_0": 0.25, + "TT_Chart/mode_1": 0.16666666666666666, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5263157894736842, + "TT_Math/mode_1": 0.5394736842105263, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.15, + "TT_Others/mode_1": 0.35, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.375, + "completion_length": 348.140625, + "completion_length/mode_0": 359.0703125, + "completion_length/mode_1": 337.2109375, + "epoch": 0.1637852593266606, + "format_confidence": 0.5, + "grad_norm": 0.7343429231062427, + "grounded_proportion": 0.5, + "kl": 0.03515625, + "learning_rate": 8.371246587807097e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.2685386538505554, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 180 + }, + { + "TT_Chart/mode_0": 0.75, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.5833333333333334, + "TT_Detection/mode_0": 0.5, + "TT_Detection/mode_1": 0.875, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5921052631578947, + "TT_Math/mode_1": 0.5131578947368421, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.75, + "TT_Others/mode_1": 0.6875, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.375, + "completion_length": 364.34765625, + "completion_length/mode_0": 393.75, + "completion_length/mode_1": 334.9453125, + "epoch": 0.16469517743403095, + "format_confidence": 0.5, + "grad_norm": 1.3940683409897012, + "grounded_proportion": 0.5, + "kl": 0.024658203125, + "learning_rate": 8.362147406733395e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.546875, + "reward_std": 0.37559884786605835, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.546875, + "step": 181 + }, + { + "TT_Chart/mode_0": 0.15, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5125, + "TT_Math/mode_1": 0.475, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.75, + "TT_Science/mode_1": 0.5, + "completion_length": 342.953125, + "completion_length/mode_0": 368.9765625, + "completion_length/mode_1": 316.9296875, + "epoch": 0.16560509554140126, + "format_confidence": 0.5, + "grad_norm": 0.7261784826800016, + "grounded_proportion": 0.5, + "kl": 0.0245361328125, + "learning_rate": 8.35304822565969e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.41015625, + "reward_std": 0.2590813636779785, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.41015625, + "step": 182 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.08333333333333333, + "TT_Counting/mode_0": 0.5, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.5833333333333334, + "TT_Detection/mode_1": 0.5833333333333334, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.38235294117647056, + "TT_Math/mode_1": 0.39705882352941174, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.16666666666666666, + "TT_Science/mode_1": 0.16666666666666666, + "completion_length": 321.29296875, + "completion_length/mode_0": 325.5390625, + "completion_length/mode_1": 317.046875, + "epoch": 0.1665150136487716, + "format_confidence": 0.5, + "grad_norm": 0.690692859788369, + "grounded_proportion": 0.5, + "kl": 0.02587890625, + "learning_rate": 8.343949044585987e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.00390625, + "reward": 1.31640625, + "reward_std": 0.26170387864112854, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.3203125, + "step": 183 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.6666666666666666, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.6666666666666666, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.625, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.6029411764705882, + "TT_Math/mode_1": 0.5147058823529411, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.125, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.5, + "completion_length": 299.9453125, + "completion_length/mode_0": 306.9921875, + "completion_length/mode_1": 292.8984375, + "epoch": 0.16742493175614195, + "format_confidence": 0.5, + "grad_norm": 0.943115785852982, + "grounded_proportion": 0.5, + "kl": 0.033447265625, + "learning_rate": 8.334849863512284e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.53125, + "reward_std": 0.30076679587364197, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.53125, + "step": 184 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.35, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.125, + "TT_Grounding/mode_1": 0.375, + "TT_Math/mode_0": 0.5694444444444444, + "TT_Math/mode_1": 0.5416666666666666, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.125, + "completion_length": 309.7265625, + "completion_length/mode_0": 322.5703125, + "completion_length/mode_1": 296.8828125, + "epoch": 0.16833484986351227, + "format_confidence": 0.5, + "grad_norm": 0.8074492550195393, + "grounded_proportion": 0.5, + "kl": 0.03271484375, + "learning_rate": 8.32575068243858e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.38671875, + "reward_std": 0.28630331158638, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.38671875, + "step": 185 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.8333333333333334, + "TT_Counting/mode_0": 0.08333333333333333, + "TT_Counting/mode_1": 0.16666666666666666, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.25, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.515625, + "TT_Math/mode_1": 0.484375, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.25, + "TT_Others/mode_1": 0.08333333333333333, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.125, + "TT_Science/mode_1": 0.25, + "completion_length": 353.81640625, + "completion_length/mode_0": 383.046875, + "completion_length/mode_1": 324.5859375, + "epoch": 0.16924476797088261, + "format_confidence": 0.5, + "grad_norm": 3.2195277238429063, + "grounded_proportion": 0.5, + "kl": 0.035400390625, + "learning_rate": 8.316651501364876e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.0, + "reward": 1.39453125, + "reward_std": 0.30312827229499817, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.39453125, + "step": 186 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.25, + "TT_Counting/mode_0": 0.3333333333333333, + "TT_Counting/mode_1": 0.75, + "TT_Detection/mode_0": 0.625, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.125, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.35, + "TT_Grounding/mode_1": 0.45, + "TT_Math/mode_0": 0.578125, + "TT_Math/mode_1": 0.5, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.875, + "completion_length": 340.77734375, + "completion_length/mode_0": 350.6328125, + "completion_length/mode_1": 330.921875, + "epoch": 0.17015468607825296, + "format_confidence": 0.5, + "grad_norm": 1.0584895132267007, + "grounded_proportion": 0.5, + "kl": 0.0311279296875, + "learning_rate": 8.307552320291174e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.4609375, + "reward_std": 0.35466182231903076, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.46484375, + "step": 187 + }, + { + "TT_Chart/mode_0": 0.1875, + "TT_Chart/mode_1": 0.1875, + "TT_Counting/mode_0": 0.8125, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.625, + "TT_Detection/mode_1": 0.25, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.35, + "TT_Grounding/mode_1": 0.45, + "TT_Math/mode_0": 0.546875, + "TT_Math/mode_1": 0.453125, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.6, + "TT_Science/mode_1": 0.45, + "completion_length": 340.80859375, + "completion_length/mode_0": 366.453125, + "completion_length/mode_1": 315.1640625, + "epoch": 0.1710646041856233, + "format_confidence": 0.5, + "grad_norm": 1.7060635808674018, + "grounded_proportion": 0.5, + "kl": 0.03271484375, + "learning_rate": 8.298453139217471e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.49609375, + "reward_std": 0.2823604345321655, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.49609375, + "step": 188 + }, + { + "TT_Chart/mode_0": 0.5, + "TT_Chart/mode_1": 0.45, + "TT_Counting/mode_0": 0.125, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.5833333333333334, + "TT_Grounding/mode_1": 0.5833333333333334, + "TT_Math/mode_0": 0.618421052631579, + "TT_Math/mode_1": 0.5789473684210527, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 1.0, + "TT_Others/mode_1": 1.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.25, + "completion_length": 442.46875, + "completion_length/mode_0": 476.4296875, + "completion_length/mode_1": 408.5078125, + "epoch": 0.17197452229299362, + "format_confidence": 0.5, + "grad_norm": 1.1713295543987994, + "grounded_proportion": 0.5, + "kl": 0.0224609375, + "learning_rate": 8.289353958143766e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.53515625, + "reward_std": 0.30076926946640015, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.53515625, + "step": 189 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.375, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.3333333333333333, + "TT_Detection/mode_1": 0.3333333333333333, + "TT_Document/mode_0": 0.0, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.375, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.475, + "TT_OCR/mode_0": 1.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.05, + "TT_Others/mode_1": 0.15, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.55, + "TT_Science/mode_1": 0.4, + "completion_length": 260.03125, + "completion_length/mode_0": 268.5, + "completion_length/mode_1": 251.5625, + "epoch": 0.17288444040036396, + "format_confidence": 0.5, + "grad_norm": 1.7958755613959498, + "grounded_proportion": 0.5, + "kl": 0.03857421875, + "learning_rate": 8.280254777070064e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.32421875, + "reward_std": 0.2951115369796753, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.32421875, + "step": 190 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.0, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.25, + "TT_Math/mode_0": 0.5125, + "TT_Math/mode_1": 0.475, + "TT_OCR/mode_0": 0.875, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.25, + "TT_Science/mode_1": 0.25, + "completion_length": 342.26171875, + "completion_length/mode_0": 358.6328125, + "completion_length/mode_1": 325.890625, + "epoch": 0.1737943585077343, + "format_confidence": 0.5, + "grad_norm": 1.119909562752138, + "grounded_proportion": 0.5, + "kl": 0.036376953125, + "learning_rate": 8.27115559599636e-07, + "loss": 0.0015, + "over_lengthy_sequences": 0.0, + "reward": 1.421875, + "reward_std": 0.29906177520751953, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.421875, + "step": 191 + }, + { + "TT_Chart/mode_0": 0.375, + "TT_Chart/mode_1": 0.375, + "TT_Counting/mode_0": 0.4166666666666667, + "TT_Counting/mode_1": 0.25, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.75, + "TT_Math/mode_0": 0.4722222222222222, + "TT_Math/mode_1": 0.4305555555555556, + "TT_OCR/mode_0": 0.625, + "TT_OCR/mode_1": 0.125, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5, + "completion_length": 397.421875, + "completion_length/mode_0": 413.5703125, + "completion_length/mode_1": 381.2734375, + "epoch": 0.17470427661510465, + "format_confidence": 0.5, + "grad_norm": 1.2286906019231645, + "grounded_proportion": 0.5, + "kl": 0.0311279296875, + "learning_rate": 8.262056414922656e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.00390625, + "reward": 1.41015625, + "reward_std": 0.32666200399398804, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.4140625, + "step": 192 + }, + { + "TT_Chart/mode_0": 0.35, + "TT_Chart/mode_1": 0.5, + "TT_Counting/mode_0": 0.625, + "TT_Counting/mode_1": 0.375, + "TT_Detection/mode_0": 0.6666666666666666, + "TT_Detection/mode_1": 0.6666666666666666, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.6666666666666666, + "TT_Grounding/mode_1": 0.6666666666666666, + "TT_Math/mode_0": 0.4, + "TT_Math/mode_1": 0.45, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.25, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.5, + "completion_length": 359.20703125, + "completion_length/mode_0": 382.375, + "completion_length/mode_1": 336.0390625, + "epoch": 0.17561419472247497, + "format_confidence": 0.5, + "grad_norm": 0.909737373388406, + "grounded_proportion": 0.5, + "kl": 0.033447265625, + "learning_rate": 8.252957233848953e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.4765625, + "reward_std": 0.3333781063556671, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.4765625, + "step": 193 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.75, + "TT_Counting/mode_1": 0.625, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 0.75, + "TT_Document/mode_1": 0.25, + "TT_Grounding/mode_0": 0.0, + "TT_Grounding/mode_1": 0.0, + "TT_Math/mode_0": 0.5227272727272727, + "TT_Math/mode_1": 0.4431818181818182, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.0, + "completion_length": 405.328125, + "completion_length/mode_0": 413.921875, + "completion_length/mode_1": 396.734375, + "epoch": 0.17652411282984531, + "format_confidence": 0.5, + "grad_norm": 1.000256041324093, + "grounded_proportion": 0.5, + "kl": 0.033935546875, + "learning_rate": 8.24385805277525e-07, + "loss": 0.0014, + "over_lengthy_sequences": 0.00390625, + "reward": 1.390625, + "reward_std": 0.31116873025894165, + "rewards/format_reward": 0.99609375, + "rewards/general_task_reward": 0.39453125, + "step": 194 + }, + { + "TT_Chart/mode_0": 0.041666666666666664, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.75, + "TT_Detection/mode_1": 0.75, + "TT_Document/mode_0": 0.5, + "TT_Document/mode_1": 0.0, + "TT_Grounding/mode_0": 0.625, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.4264705882352941, + "TT_Math/mode_1": 0.5735294117647058, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.375, + "TT_Science/mode_1": 0.125, + "completion_length": 341.66015625, + "completion_length/mode_0": 353.7109375, + "completion_length/mode_1": 329.609375, + "epoch": 0.17743403093721566, + "format_confidence": 0.5, + "grad_norm": 0.5359895637363522, + "grounded_proportion": 0.5, + "kl": 0.023681640625, + "learning_rate": 8.234758871701548e-07, + "loss": 0.0009, + "over_lengthy_sequences": 0.0, + "reward": 1.3515625, + "reward_std": 0.2563130855560303, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3515625, + "step": 195 + }, + { + "TT_Chart/mode_0": 0.4166666666666667, + "TT_Chart/mode_1": 0.16666666666666666, + "TT_Counting/mode_0": 0.6666666666666666, + "TT_Counting/mode_1": 0.6666666666666666, + "TT_Detection/mode_0": 0.0, + "TT_Detection/mode_1": 0.125, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.1875, + "TT_Grounding/mode_1": 0.1875, + "TT_Math/mode_0": 0.46875, + "TT_Math/mode_1": 0.453125, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 0.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 1.0, + "TT_Science/mode_1": 0.75, + "completion_length": 283.7734375, + "completion_length/mode_0": 304.7109375, + "completion_length/mode_1": 262.8359375, + "epoch": 0.17834394904458598, + "format_confidence": 0.5, + "grad_norm": 9.681747982191952, + "grounded_proportion": 0.5, + "kl": 0.0322265625, + "learning_rate": 8.225659690627843e-07, + "loss": 0.0013, + "over_lengthy_sequences": 0.0, + "reward": 1.3984375, + "reward_std": 0.28630968928337097, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3984375, + "step": 196 + }, + { + "TT_Chart/mode_0": 1.0, + "TT_Chart/mode_1": 1.0, + "TT_Counting/mode_0": 0.25, + "TT_Counting/mode_1": 0.5, + "TT_Detection/mode_0": 0.25, + "TT_Detection/mode_1": 0.16666666666666666, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 0.5, + "TT_Grounding/mode_0": 0.5, + "TT_Grounding/mode_1": 0.5, + "TT_Math/mode_0": 0.35714285714285715, + "TT_Math/mode_1": 0.30952380952380953, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.5, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.0, + "TT_Science/mode_1": 0.25, + "completion_length": 424.5859375, + "completion_length/mode_0": 437.5, + "completion_length/mode_1": 411.671875, + "epoch": 0.17925386715195632, + "format_confidence": 0.5, + "grad_norm": 0.5628841267736974, + "grounded_proportion": 0.5, + "kl": 0.0281982421875, + "learning_rate": 8.21656050955414e-07, + "loss": 0.0011, + "over_lengthy_sequences": 0.0, + "reward": 1.3515625, + "reward_std": 0.23250606656074524, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3515625, + "step": 197 + }, + { + "TT_Chart/mode_0": 0.08333333333333333, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.125, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.5, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.625, + "TT_Grounding/mode_1": 0.875, + "TT_Math/mode_0": 0.4722222222222222, + "TT_Math/mode_1": 0.4861111111111111, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.4166666666666667, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.5, + "TT_Science/mode_1": 0.0, + "completion_length": 304.98046875, + "completion_length/mode_0": 306.78125, + "completion_length/mode_1": 303.1796875, + "epoch": 0.18016378525932666, + "format_confidence": 0.5, + "grad_norm": 1.2573499359794245, + "grounded_proportion": 0.5, + "kl": 0.0247802734375, + "learning_rate": 8.207461328480437e-07, + "loss": 0.001, + "over_lengthy_sequences": 0.0, + "reward": 1.42578125, + "reward_std": 0.24461543560028076, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.42578125, + "step": 198 + }, + { + "TT_Chart/mode_0": 0.6666666666666666, + "TT_Chart/mode_1": 0.6666666666666666, + "TT_Counting/mode_0": 0.0, + "TT_Counting/mode_1": 0.0, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 1.0, + "TT_Document/mode_1": 1.0, + "TT_Grounding/mode_0": 0.16666666666666666, + "TT_Grounding/mode_1": 0.16666666666666666, + "TT_Math/mode_0": 0.5, + "TT_Math/mode_1": 0.5333333333333333, + "TT_OCR/mode_0": 0.5, + "TT_OCR/mode_1": 0.375, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.0, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.625, + "TT_Science/mode_1": 0.375, + "completion_length": 366.18359375, + "completion_length/mode_0": 374.296875, + "completion_length/mode_1": 358.0703125, + "epoch": 0.181073703366697, + "format_confidence": 0.5, + "grad_norm": 0.763912879776671, + "grounded_proportion": 0.5, + "kl": 0.0439453125, + "learning_rate": 8.198362147406733e-07, + "loss": 0.0018, + "over_lengthy_sequences": 0.0, + "reward": 1.45703125, + "reward_std": 0.3085215389728546, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.45703125, + "step": 199 + }, + { + "TT_Chart/mode_0": 0.0, + "TT_Chart/mode_1": 0.0, + "TT_Counting/mode_0": 0.4375, + "TT_Counting/mode_1": 0.4375, + "TT_Detection/mode_0": 0.125, + "TT_Detection/mode_1": 0.625, + "TT_Document/mode_0": 0.25, + "TT_Document/mode_1": 0.75, + "TT_Grounding/mode_0": 1.0, + "TT_Grounding/mode_1": 1.0, + "TT_Math/mode_0": 0.39285714285714285, + "TT_Math/mode_1": 0.40476190476190477, + "TT_OCR/mode_0": 0.0, + "TT_OCR/mode_1": 1.0, + "TT_Others/mode_0": 0.0, + "TT_Others/mode_1": 0.125, + "TT_Puzzle/mode_0": 0.75, + "TT_Puzzle/mode_1": 0.5, + "TT_Science/mode_0": 0.625, + "TT_Science/mode_1": 0.375, + "completion_length": 342.05859375, + "completion_length/mode_0": 350.3359375, + "completion_length/mode_1": 333.78125, + "epoch": 0.18198362147406733, + "format_confidence": 0.5, + "grad_norm": 1.010417481464837, + "grounded_proportion": 0.5, + "kl": 0.0294189453125, + "learning_rate": 8.189262966333029e-07, + "loss": 0.0012, + "over_lengthy_sequences": 0.0, + "reward": 1.3828125, + "reward_std": 0.2797393798828125, + "rewards/format_reward": 1.0, + "rewards/general_task_reward": 0.3828125, + "step": 200 + } + ], + "logging_steps": 1.0, + "max_steps": 1099, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}