{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.18198362147406733, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.16666666666666666, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.08333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0625, "TT_Grounding/mode_1": 0.3125, "TT_Math/mode_0": 0.4605263157894737, "TT_Math/mode_1": 0.4605263157894737, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 377.5625, "completion_length/mode_0": 383.5234375, "completion_length/mode_1": 371.6015625, "epoch": 0.0009099181073703367, "format_confidence": 0.5, "grad_norm": 1.5429338116205915, "grounded_proportion": 0.5, "kl": 0.0, "learning_rate": 1e-06, "loss": 0.0, "over_lengthy_sequences": 0.00390625, "reward": 1.3203125, "reward_std": 0.33722585439682007, "rewards/format_reward": 0.9765625, "rewards/general_task_reward": 0.34375, "step": 1 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5833333333333334, "TT_Counting/mode_0": 0.0625, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.40384615384615385, "TT_Math/mode_1": 0.38461538461538464, "TT_OCR/mode_0": 0.125, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.4375, "completion_length": 304.2890625, "completion_length/mode_0": 337.1171875, "completion_length/mode_1": 271.4609375, "epoch": 0.0018198362147406734, "format_confidence": 0.5, "grad_norm": 1.1197813674763613, "grounded_proportion": 0.5, "kl": 0.000415802001953125, "learning_rate": 9.990900818926296e-07, "loss": 0.0, "over_lengthy_sequences": 0.0, "reward": 1.375, "reward_std": 0.2749903202056885, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.375, "step": 2 }, { "TT_Chart/mode_0": 0.9166666666666666, "TT_Chart/mode_1": 0.9166666666666666, "TT_Counting/mode_0": 0.0625, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.625, "TT_Math/mode_1": 0.5340909090909091, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.375, "completion_length": 367.0625, "completion_length/mode_0": 395.9765625, "completion_length/mode_1": 338.1484375, "epoch": 0.00272975432211101, "format_confidence": 0.5, "grad_norm": 2.0139122368418265, "grounded_proportion": 0.5, "kl": 0.0003833770751953125, "learning_rate": 9.981801637852592e-07, "loss": 0.0, "over_lengthy_sequences": 0.0, "reward": 1.5078125, "reward_std": 0.33350804448127747, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.515625, "step": 3 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.703125, "TT_Math/mode_1": 0.59375, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.375, "completion_length": 311.7890625, "completion_length/mode_0": 331.875, "completion_length/mode_1": 291.703125, "epoch": 0.003639672429481347, "format_confidence": 0.5, "grad_norm": 0.9407069347152167, "grounded_proportion": 0.5, "kl": 0.0003643035888671875, "learning_rate": 9.97270245677889e-07, "loss": 0.0, "over_lengthy_sequences": 0.0, "reward": 1.3828125, "reward_std": 0.27103859186172485, "rewards/format_reward": 0.98046875, "rewards/general_task_reward": 0.40234375, "step": 4 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.5625, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.36764705882352944, "TT_Math/mode_1": 0.2647058823529412, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.375, "completion_length": 366.9765625, "completion_length/mode_0": 407.4140625, "completion_length/mode_1": 326.5390625, "epoch": 0.004549590536851683, "format_confidence": 0.5, "grad_norm": 0.9635797377965721, "grounded_proportion": 0.5, "kl": 0.00054931640625, "learning_rate": 9.963603275705185e-07, "loss": 0.0, "over_lengthy_sequences": 0.00390625, "reward": 1.32421875, "reward_std": 0.26956743001937866, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.33203125, "step": 5 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.08333333333333333, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.3125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.3333333333333333, "TT_Document/mode_1": 0.08333333333333333, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4, "TT_Math/mode_1": 0.48333333333333334, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.75, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.5, "completion_length": 322.3125, "completion_length/mode_0": 345.171875, "completion_length/mode_1": 299.453125, "epoch": 0.00545950864422202, "format_confidence": 0.5, "grad_norm": 1.073086357937483, "grounded_proportion": 0.5, "kl": 0.000858306884765625, "learning_rate": 9.954504094631483e-07, "loss": 0.0, "over_lengthy_sequences": 0.0, "reward": 1.3046875, "reward_std": 0.3800785541534424, "rewards/format_reward": 0.98046875, "rewards/general_task_reward": 0.32421875, "step": 6 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.75, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5588235294117647, "TT_Math/mode_1": 0.4264705882352941, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.25, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.625, "TT_Science/mode_1": 0.25, "completion_length": 292.5859375, "completion_length/mode_0": 318.9609375, "completion_length/mode_1": 266.2109375, "epoch": 0.006369426751592357, "format_confidence": 0.5, "grad_norm": 0.8168275363403603, "grounded_proportion": 0.5, "kl": 0.0014495849609375, "learning_rate": 9.94540491355778e-07, "loss": 0.0001, "over_lengthy_sequences": 0.0, "reward": 1.40625, "reward_std": 0.32825323939323425, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40625, "step": 7 }, { "TT_Chart/mode_0": 0.15, "TT_Chart/mode_1": 0.15, "TT_Counting/mode_0": 0.6666666666666666, "TT_Counting/mode_1": 0.16666666666666666, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.65, "TT_Math/mode_1": 0.525, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0625, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.375, "completion_length": 350.88671875, "completion_length/mode_0": 379.875, "completion_length/mode_1": 321.8984375, "epoch": 0.007279344858962694, "format_confidence": 0.5, "grad_norm": 0.9037954457959693, "grounded_proportion": 0.5, "kl": 0.000934600830078125, "learning_rate": 9.936305732484076e-07, "loss": 0.0, "over_lengthy_sequences": 0.0, "reward": 1.34375, "reward_std": 0.3212430477142334, "rewards/format_reward": 0.98828125, "rewards/general_task_reward": 0.35546875, "step": 8 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.5454545454545454, "TT_Math/mode_1": 0.5227272727272727, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0625, "TT_Science/mode_1": 0.375, "completion_length": 373.3203125, "completion_length/mode_0": 413.8046875, "completion_length/mode_1": 332.8359375, "epoch": 0.00818926296633303, "format_confidence": 0.5, "grad_norm": 1.5911208325130124, "grounded_proportion": 0.5, "kl": 0.001251220703125, "learning_rate": 9.927206551410372e-07, "loss": 0.0001, "over_lengthy_sequences": 0.0, "reward": 1.4296875, "reward_std": 0.3262837529182434, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.43359375, "step": 9 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.5833333333333334, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5833333333333334, "TT_Math/mode_0": 0.5357142857142857, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0625, "TT_Science/mode_1": 0.375, "completion_length": 372.32421875, "completion_length/mode_0": 402.265625, "completion_length/mode_1": 342.3828125, "epoch": 0.009099181073703366, "format_confidence": 0.5, "grad_norm": 0.9600405491472309, "grounded_proportion": 0.5, "kl": 0.0033721923828125, "learning_rate": 9.918107370336669e-07, "loss": 0.0001, "over_lengthy_sequences": 0.0, "reward": 1.4375, "reward_std": 0.270779013633728, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4375, "step": 10 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.375, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.47368421052631576, "TT_Math/mode_1": 0.39473684210526316, "TT_OCR/mode_0": 0.125, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.875, "TT_Science/mode_1": 0.625, "completion_length": 387.62109375, "completion_length/mode_0": 418.53125, "completion_length/mode_1": 356.7109375, "epoch": 0.010009099181073703, "format_confidence": 0.5, "grad_norm": 0.8976670020100668, "grounded_proportion": 0.5, "kl": 0.0032501220703125, "learning_rate": 9.909008189262967e-07, "loss": 0.0001, "over_lengthy_sequences": 0.0, "reward": 1.40625, "reward_std": 0.24920988082885742, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40625, "step": 11 }, { "TT_Chart/mode_0": 0.15, "TT_Chart/mode_1": 0.3, "TT_Counting/mode_0": 0.1875, "TT_Counting/mode_1": 0.3125, "TT_Detection/mode_0": 0.375, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.375, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.546875, "TT_Math/mode_1": 0.484375, "TT_OCR/mode_0": 0.125, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.5, "completion_length": 373.30859375, "completion_length/mode_0": 404.21875, "completion_length/mode_1": 342.3984375, "epoch": 0.01091901728844404, "format_confidence": 0.5, "grad_norm": 0.7415423083376748, "grounded_proportion": 0.5, "kl": 0.006988525390625, "learning_rate": 9.899909008189261e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0, "reward": 1.3828125, "reward_std": 0.29287609457969666, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3828125, "step": 12 }, { "TT_Chart/mode_0": 0.3125, "TT_Chart/mode_1": 0.4375, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4666666666666667, "TT_Math/mode_1": 0.55, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 1.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.2, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 272.83984375, "completion_length/mode_0": 305.1953125, "completion_length/mode_1": 240.484375, "epoch": 0.011828935395814377, "format_confidence": 0.5, "grad_norm": 1.416444491434725, "grounded_proportion": 0.5, "kl": 0.01019287109375, "learning_rate": 9.89080982711556e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.3671875, "reward_std": 0.2200184315443039, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.37109375, "step": 13 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.875, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.08333333333333333, "TT_Math/mode_0": 0.5166666666666667, "TT_Math/mode_1": 0.43333333333333335, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 334.30859375, "completion_length/mode_0": 376.7578125, "completion_length/mode_1": 291.859375, "epoch": 0.012738853503184714, "format_confidence": 0.5, "grad_norm": 1.2195596640953166, "grounded_proportion": 0.5, "kl": 0.0172119140625, "learning_rate": 9.881710646041856e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.33984375, "reward_std": 0.25836920738220215, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.34375, "step": 14 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0625, "TT_Document/mode_1": 0.1875, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.5454545454545454, "TT_Math/mode_1": 0.4431818181818182, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 350.69921875, "completion_length/mode_0": 371.78125, "completion_length/mode_1": 329.6171875, "epoch": 0.01364877161055505, "format_confidence": 0.5, "grad_norm": 0.9960620613889194, "grounded_proportion": 0.5, "kl": 0.025146484375, "learning_rate": 9.872611464968153e-07, "loss": 0.001, "over_lengthy_sequences": 0.00390625, "reward": 1.39453125, "reward_std": 0.29446732997894287, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.3984375, "step": 15 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.15, "TT_Grounding/mode_1": 0.35, "TT_Math/mode_0": 0.5294117647058824, "TT_Math/mode_1": 0.5147058823529411, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 351.96484375, "completion_length/mode_0": 384.609375, "completion_length/mode_1": 319.3203125, "epoch": 0.014558689717925387, "format_confidence": 0.5, "grad_norm": 0.9260258786381415, "grounded_proportion": 0.5, "kl": 0.0201416015625, "learning_rate": 9.863512283894449e-07, "loss": 0.0008, "over_lengthy_sequences": 0.0, "reward": 1.3359375, "reward_std": 0.19503436982631683, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3359375, "step": 16 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.45, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.47619047619047616, "TT_Math/mode_1": 0.42857142857142855, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.5, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.25, "completion_length": 368.91796875, "completion_length/mode_0": 401.9609375, "completion_length/mode_1": 335.875, "epoch": 0.015468607825295723, "format_confidence": 0.5, "grad_norm": 0.8885940593044405, "grounded_proportion": 0.5, "kl": 0.007049560546875, "learning_rate": 9.854413102820745e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.2695994973182678, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.390625, "step": 17 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.16666666666666666, "TT_Counting/mode_1": 0.16666666666666666, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.16666666666666666, "TT_Document/mode_1": 0.16666666666666666, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.5384615384615384, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.75, "TT_Science/mode_0": 0.625, "TT_Science/mode_1": 0.75, "completion_length": 266.00390625, "completion_length/mode_0": 279.5390625, "completion_length/mode_1": 252.46875, "epoch": 0.01637852593266606, "format_confidence": 0.5, "grad_norm": 1.4954519102348234, "grounded_proportion": 0.5, "kl": 0.00970458984375, "learning_rate": 9.845313921747044e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.3671875, "reward_std": 0.30023884773254395, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.37109375, "step": 18 }, { "TT_Chart/mode_0": 0.3333333333333333, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.4375, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.375, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5192307692307693, "TT_Math/mode_1": 0.4423076923076923, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 305.93359375, "completion_length/mode_0": 336.4140625, "completion_length/mode_1": 275.453125, "epoch": 0.017288444040036398, "format_confidence": 0.5, "grad_norm": 1.2648060156416405, "grounded_proportion": 0.5, "kl": 0.00762939453125, "learning_rate": 9.836214740673338e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0, "reward": 1.3359375, "reward_std": 0.22764958441257477, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3359375, "step": 19 }, { "TT_Chart/mode_0": 0.3333333333333333, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.125, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.8333333333333334, "TT_Document/mode_1": 0.16666666666666666, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.4, "TT_Math/mode_0": 0.45588235294117646, "TT_Math/mode_1": 0.4117647058823529, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.375, "completion_length": 350.97265625, "completion_length/mode_0": 366.796875, "completion_length/mode_1": 335.1484375, "epoch": 0.018198362147406732, "format_confidence": 0.5, "grad_norm": 2.2680753597810743, "grounded_proportion": 0.5, "kl": 0.01123046875, "learning_rate": 9.827115559599636e-07, "loss": 0.0004, "over_lengthy_sequences": 0.00390625, "reward": 1.3515625, "reward_std": 0.35683149099349976, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.35546875, "step": 20 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.35, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.55, "TT_Math/mode_1": 0.4666666666666667, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5, "completion_length": 300.5703125, "completion_length/mode_0": 328.75, "completion_length/mode_1": 272.390625, "epoch": 0.01910828025477707, "format_confidence": 0.5, "grad_norm": 1.1844820805839822, "grounded_proportion": 0.5, "kl": 0.0118408203125, "learning_rate": 9.818016378525933e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.3515625, "reward_std": 0.27275240421295166, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3515625, "step": 21 }, { "TT_Chart/mode_0": 0.20833333333333334, "TT_Chart/mode_1": 0.20833333333333334, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.45, "TT_Math/mode_1": 0.45, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.4166666666666667, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 331.59765625, "completion_length/mode_0": 350.2890625, "completion_length/mode_1": 312.90625, "epoch": 0.020018198362147407, "format_confidence": 0.5, "grad_norm": 0.8541575651698342, "grounded_proportion": 0.5, "kl": 0.010986328125, "learning_rate": 9.80891719745223e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.30078125, "reward_std": 0.2321278154850006, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.30078125, "step": 22 }, { "TT_Chart/mode_0": 0.5833333333333334, "TT_Chart/mode_1": 0.4166666666666667, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.48863636363636365, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.6666666666666666, "TT_Science/mode_1": 0.5, "completion_length": 387.9765625, "completion_length/mode_0": 414.9453125, "completion_length/mode_1": 361.0078125, "epoch": 0.020928116469517744, "format_confidence": 0.5, "grad_norm": 1.5808055185099863, "grounded_proportion": 0.5, "kl": 0.006439208984375, "learning_rate": 9.799818016378525e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0, "reward": 1.46484375, "reward_std": 0.32852408289909363, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.46484375, "step": 23 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.125, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.453125, "TT_Math/mode_1": 0.515625, "TT_OCR/mode_0": 0.5625, "TT_OCR/mode_1": 0.6875, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 331.4140625, "completion_length/mode_0": 352.4453125, "completion_length/mode_1": 310.3828125, "epoch": 0.02183803457688808, "format_confidence": 0.5, "grad_norm": 0.8593245043140431, "grounded_proportion": 0.5, "kl": 0.01055908203125, "learning_rate": 9.790718835304822e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.3671875, "reward_std": 0.24446570873260498, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3671875, "step": 24 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.3125, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.484375, "TT_OCR/mode_0": 0.5625, "TT_OCR/mode_1": 0.6875, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.1875, "completion_length": 346.8046875, "completion_length/mode_0": 340.640625, "completion_length/mode_1": 352.96875, "epoch": 0.022747952684258416, "format_confidence": 0.5, "grad_norm": 0.9872564452072786, "grounded_proportion": 0.5, "kl": 0.00830078125, "learning_rate": 9.78161965423112e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0078125, "reward": 1.3359375, "reward_std": 0.3047879636287689, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.34375, "step": 25 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.6666666666666666, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4722222222222222, "TT_Math/mode_1": 0.3611111111111111, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.3125, "completion_length": 370.4453125, "completion_length/mode_0": 427.0, "completion_length/mode_1": 313.890625, "epoch": 0.023657870791628753, "format_confidence": 0.5, "grad_norm": 1.0075122997992207, "grounded_proportion": 0.5, "kl": 0.0067138671875, "learning_rate": 9.772520473157414e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0, "reward": 1.34375, "reward_std": 0.301014244556427, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.34765625, "step": 26 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.125, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.453125, "TT_Math/mode_1": 0.46875, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.05, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 331.84375, "completion_length/mode_0": 353.1015625, "completion_length/mode_1": 310.5859375, "epoch": 0.02456778889899909, "format_confidence": 0.5, "grad_norm": 0.9025821168695539, "grounded_proportion": 0.5, "kl": 0.0146484375, "learning_rate": 9.763421292083713e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.28125, "reward_std": 0.2187202274799347, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.28125, "step": 27 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.3125, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.6666666666666666, "TT_Math/mode_0": 0.3333333333333333, "TT_Math/mode_1": 0.2777777777777778, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 390.828125, "completion_length/mode_0": 404.609375, "completion_length/mode_1": 377.046875, "epoch": 0.025477707006369428, "format_confidence": 0.5, "grad_norm": 1.1882479091934361, "grounded_proportion": 0.5, "kl": 0.021728515625, "learning_rate": 9.75432211101001e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.29296875, "reward_std": 0.25501734018325806, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.29296875, "step": 28 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.16666666666666666, "TT_Counting/mode_1": 0.08333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.125, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5138888888888888, "TT_Math/mode_1": 0.4583333333333333, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 455.8359375, "completion_length/mode_0": 462.328125, "completion_length/mode_1": 449.34375, "epoch": 0.026387625113739762, "format_confidence": 0.5, "grad_norm": 0.6446278395164787, "grounded_proportion": 0.5, "kl": 0.005828857421875, "learning_rate": 9.745222929936306e-07, "loss": 0.0002, "over_lengthy_sequences": 0.0078125, "reward": 1.3125, "reward_std": 0.22071683406829834, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.3203125, "step": 29 }, { "TT_Chart/mode_0": 0.4166666666666667, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.125, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.4090909090909091, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.125, "completion_length": 432.3046875, "completion_length/mode_0": 476.96875, "completion_length/mode_1": 387.640625, "epoch": 0.0272975432211101, "format_confidence": 0.5, "grad_norm": 0.7387045634169115, "grounded_proportion": 0.5, "kl": 0.01116943359375, "learning_rate": 9.736123748862602e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.3345615267753601, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.38671875, "step": 30 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.125, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5125, "TT_Math/mode_1": 0.525, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.75, "completion_length": 387.2578125, "completion_length/mode_0": 424.34375, "completion_length/mode_1": 350.171875, "epoch": 0.028207461328480437, "format_confidence": 0.5, "grad_norm": 0.9075215710759794, "grounded_proportion": 0.5, "kl": 0.0091552734375, "learning_rate": 9.727024567788898e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.40234375, "reward_std": 0.2678895890712738, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40234375, "step": 31 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.16666666666666666, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5681818181818182, "TT_Math/mode_1": 0.5227272727272727, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.0, "completion_length": 367.25, "completion_length/mode_0": 399.828125, "completion_length/mode_1": 334.671875, "epoch": 0.029117379435850774, "format_confidence": 0.5, "grad_norm": 0.7678387402875528, "grounded_proportion": 0.5, "kl": 0.01202392578125, "learning_rate": 9.717925386715195e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.42578125, "reward_std": 0.284709632396698, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.42578125, "step": 32 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.42391304347826086, "TT_Math/mode_1": 0.5434782608695652, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.0, "completion_length": 412.55859375, "completion_length/mode_0": 444.4921875, "completion_length/mode_1": 380.625, "epoch": 0.03002729754322111, "format_confidence": 0.5, "grad_norm": 1.076115993141789, "grounded_proportion": 0.5, "kl": 0.01397705078125, "learning_rate": 9.70882620564149e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.304455041885376, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 33 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.3375, "TT_Math/mode_1": 0.4, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 358.66015625, "completion_length/mode_0": 385.1640625, "completion_length/mode_1": 332.15625, "epoch": 0.030937215650591446, "format_confidence": 0.5, "grad_norm": 1.2114231515737024, "grounded_proportion": 0.5, "kl": 0.0238037109375, "learning_rate": 9.69972702456779e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.3359375, "reward_std": 0.21515312790870667, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3359375, "step": 34 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.08333333333333333, "TT_Math/mode_0": 0.5694444444444444, "TT_Math/mode_1": 0.4861111111111111, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 339.53125, "completion_length/mode_0": 356.71875, "completion_length/mode_1": 322.34375, "epoch": 0.03184713375796178, "format_confidence": 0.5, "grad_norm": 0.9974240246736068, "grounded_proportion": 0.5, "kl": 0.026123046875, "learning_rate": 9.690627843494086e-07, "loss": 0.001, "over_lengthy_sequences": 0.00390625, "reward": 1.3359375, "reward_std": 0.28288590908050537, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.33984375, "step": 35 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.05, "TT_Detection/mode_1": 0.55, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.6333333333333333, "TT_Math/mode_1": 0.4166666666666667, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.25, "completion_length": 334.58203125, "completion_length/mode_0": 380.921875, "completion_length/mode_1": 288.2421875, "epoch": 0.03275705186533212, "format_confidence": 0.5, "grad_norm": 1.6406753400760954, "grounded_proportion": 0.5, "kl": 0.03759765625, "learning_rate": 9.681528662420382e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.31667181849479675, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.38671875, "step": 36 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5543478260869565, "TT_Math/mode_1": 0.5652173913043478, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.08333333333333333, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 348.41015625, "completion_length/mode_0": 370.5234375, "completion_length/mode_1": 326.296875, "epoch": 0.03366696997270246, "format_confidence": 0.5, "grad_norm": 0.6026535973402165, "grounded_proportion": 0.5, "kl": 0.0167236328125, "learning_rate": 9.672429481346678e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.4453125, "reward_std": 0.24579495191574097, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4453125, "step": 37 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.4875, "TT_Math/mode_1": 0.6, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.0, "completion_length": 325.0859375, "completion_length/mode_0": 345.7109375, "completion_length/mode_1": 304.4609375, "epoch": 0.034576888080072796, "format_confidence": 0.5, "grad_norm": 1.7996536274917565, "grounded_proportion": 0.5, "kl": 0.039794921875, "learning_rate": 9.663330300272975e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.2690715491771698, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 38 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.05, "TT_Grounding/mode_1": 0.2, "TT_Math/mode_0": 0.5277777777777778, "TT_Math/mode_1": 0.4305555555555556, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.0, "completion_length": 337.09375, "completion_length/mode_0": 384.3984375, "completion_length/mode_1": 289.7890625, "epoch": 0.03548680618744313, "format_confidence": 0.5, "grad_norm": 1.8348507814517516, "grounded_proportion": 0.5, "kl": 0.051513671875, "learning_rate": 9.65423111919927e-07, "loss": 0.0021, "over_lengthy_sequences": 0.00390625, "reward": 1.375, "reward_std": 0.29592859745025635, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.37890625, "step": 39 }, { "TT_Chart/mode_0": 0.4166666666666667, "TT_Chart/mode_1": 0.3333333333333333, "TT_Counting/mode_0": 0.4, "TT_Counting/mode_1": 0.2, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.05, "TT_Grounding/mode_1": 0.2, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.375, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.0, "completion_length": 423.27734375, "completion_length/mode_0": 478.3359375, "completion_length/mode_1": 368.21875, "epoch": 0.036396724294813464, "format_confidence": 0.5, "grad_norm": 0.8417782233647189, "grounded_proportion": 0.5, "kl": 0.0096435546875, "learning_rate": 9.645131938125567e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.4140625, "reward_std": 0.3811083436012268, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4140625, "step": 40 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.5833333333333334, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.6333333333333333, "TT_Math/mode_1": 0.6, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.625, "TT_Others/mode_0": 0.5, "TT_Others/mode_1": 0.375, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5, "completion_length": 346.296875, "completion_length/mode_0": 368.3125, "completion_length/mode_1": 324.28125, "epoch": 0.0373066424021838, "format_confidence": 0.5, "grad_norm": 0.8971248354232603, "grounded_proportion": 0.5, "kl": 0.01708984375, "learning_rate": 9.636032757051866e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.45703125, "reward_std": 0.2794685363769531, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.45703125, "step": 41 }, { "TT_Chart/mode_0": 0.39285714285714285, "TT_Chart/mode_1": 0.32142857142857145, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5192307692307693, "TT_Math/mode_1": 0.5192307692307693, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.16666666666666666, "TT_Science/mode_1": 0.4166666666666667, "completion_length": 327.87890625, "completion_length/mode_0": 374.3984375, "completion_length/mode_1": 281.359375, "epoch": 0.03821656050955414, "format_confidence": 0.5, "grad_norm": 1.969070754091683, "grounded_proportion": 0.5, "kl": 0.0157470703125, "learning_rate": 9.626933575978162e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.375, "reward_std": 0.2452620565891266, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.375, "step": 42 }, { "TT_Chart/mode_0": 0.2916666666666667, "TT_Chart/mode_1": 0.2916666666666667, "TT_Counting/mode_0": 0.6666666666666666, "TT_Counting/mode_1": 0.6666666666666666, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5833333333333334, "TT_Math/mode_1": 0.5208333333333334, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.75, "completion_length": 279.71875, "completion_length/mode_0": 307.2421875, "completion_length/mode_1": 252.1953125, "epoch": 0.039126478616924476, "format_confidence": 0.5, "grad_norm": 3.84523994130464, "grounded_proportion": 0.5, "kl": 0.0184326171875, "learning_rate": 9.617834394904458e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.390625, "reward_std": 0.2264637053012848, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.390625, "step": 43 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.125, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.5588235294117647, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.375, "completion_length": 324.25390625, "completion_length/mode_0": 361.8515625, "completion_length/mode_1": 286.65625, "epoch": 0.040036396724294813, "format_confidence": 0.5, "grad_norm": 0.9447344472383061, "grounded_proportion": 0.5, "kl": 0.01422119140625, "learning_rate": 9.608735213830755e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.39453125, "reward_std": 0.29340648651123047, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.39453125, "step": 44 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.875, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4473684210526316, "TT_Math/mode_1": 0.39473684210526316, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.5, "TT_Others/mode_1": 1.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.25, "completion_length": 337.859375, "completion_length/mode_0": 363.390625, "completion_length/mode_1": 312.328125, "epoch": 0.04094631483166515, "format_confidence": 0.5, "grad_norm": 1.4320135714153, "grounded_proportion": 0.5, "kl": 0.013671875, "learning_rate": 9.599636032757051e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.375, "reward_std": 0.31154942512512207, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.37890625, "step": 45 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.5625, "TT_Counting/mode_1": 0.4375, "TT_Detection/mode_0": 1.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.3815789473684211, "TT_Math/mode_1": 0.2894736842105263, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 380.5, "completion_length/mode_0": 418.28125, "completion_length/mode_1": 342.71875, "epoch": 0.04185623293903549, "format_confidence": 0.5, "grad_norm": 0.7559008917052512, "grounded_proportion": 0.5, "kl": 0.012939453125, "learning_rate": 9.590536851683348e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.37890625, "reward_std": 0.23144766688346863, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.37890625, "step": 46 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.4375, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5588235294117647, "TT_Math/mode_1": 0.45588235294117646, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.2, "TT_Science/mode_1": 0.2, "completion_length": 399.77734375, "completion_length/mode_0": 433.140625, "completion_length/mode_1": 366.4140625, "epoch": 0.042766151046405826, "format_confidence": 0.5, "grad_norm": 1.0624054559364031, "grounded_proportion": 0.5, "kl": 0.01416015625, "learning_rate": 9.581437670609644e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.4140625, "reward_std": 0.35639268159866333, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4140625, "step": 47 }, { "TT_Chart/mode_0": 0.875, "TT_Chart/mode_1": 0.875, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.46875, "TT_OCR/mode_0": 0.3333333333333333, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.4166666666666667, "completion_length": 349.78125, "completion_length/mode_0": 375.375, "completion_length/mode_1": 324.1875, "epoch": 0.04367606915377616, "format_confidence": 0.5, "grad_norm": 0.6132246625343835, "grounded_proportion": 0.5, "kl": 0.011474609375, "learning_rate": 9.572338489535942e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.2500086724758148, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.38671875, "step": 48 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.875, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.39473684210526316, "TT_Math/mode_1": 0.4868421052631579, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.4166666666666667, "completion_length": 357.80078125, "completion_length/mode_0": 382.140625, "completion_length/mode_1": 333.4609375, "epoch": 0.044585987261146494, "format_confidence": 0.5, "grad_norm": 1.3534089418671387, "grounded_proportion": 0.5, "kl": 0.0108642578125, "learning_rate": 9.563239308462239e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.3424571454524994, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 49 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.4722222222222222, "TT_Math/mode_1": 0.4305555555555556, "TT_OCR/mode_0": 0.08333333333333333, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.375, "completion_length": 348.2421875, "completion_length/mode_0": 378.8203125, "completion_length/mode_1": 317.6640625, "epoch": 0.04549590536851683, "format_confidence": 0.5, "grad_norm": 1.040116212408755, "grounded_proportion": 0.5, "kl": 0.0142822265625, "learning_rate": 9.554140127388535e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.34375, "reward_std": 0.3203405737876892, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.34765625, "step": 50 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.16666666666666666, "TT_Counting/mode_1": 0.5833333333333334, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.40789473684210525, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3333333333333333, "TT_Science/mode_1": 0.75, "completion_length": 356.33203125, "completion_length/mode_0": 390.25, "completion_length/mode_1": 322.4140625, "epoch": 0.04640582347588717, "format_confidence": 0.5, "grad_norm": 2.6529744307875993, "grounded_proportion": 0.5, "kl": 0.01220703125, "learning_rate": 9.545040946314831e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.40234375, "reward_std": 0.3068116307258606, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40234375, "step": 51 }, { "TT_Chart/mode_0": 0.3125, "TT_Chart/mode_1": 0.4375, "TT_Counting/mode_0": 0.5833333333333334, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.125, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.671875, "TT_Math/mode_1": 0.59375, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.75, "completion_length": 329.06640625, "completion_length/mode_0": 356.7421875, "completion_length/mode_1": 301.390625, "epoch": 0.047315741583257506, "format_confidence": 0.5, "grad_norm": 0.9611109881006651, "grounded_proportion": 0.5, "kl": 0.013916015625, "learning_rate": 9.535941765241128e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.48828125, "reward_std": 0.38452082872390747, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.48828125, "step": 52 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.1875, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.578125, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.125, "completion_length": 327.03125, "completion_length/mode_0": 346.4921875, "completion_length/mode_1": 307.5703125, "epoch": 0.048225659690627844, "format_confidence": 0.5, "grad_norm": 0.797769852763845, "grounded_proportion": 0.5, "kl": 0.012939453125, "learning_rate": 9.526842584167425e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.34375, "reward_std": 0.21713145077228546, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.34375, "step": 53 }, { "TT_Chart/mode_0": 0.6, "TT_Chart/mode_1": 0.45, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.578125, "TT_Math/mode_1": 0.578125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.5, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.4166666666666667, "TT_Science/mode_1": 0.5833333333333334, "completion_length": 292.44140625, "completion_length/mode_0": 315.625, "completion_length/mode_1": 269.2578125, "epoch": 0.04913557779799818, "format_confidence": 0.5, "grad_norm": 1.549225397699757, "grounded_proportion": 0.5, "kl": 0.01287841796875, "learning_rate": 9.517743403093721e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.46875, "reward_std": 0.20779038965702057, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.46875, "step": 54 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.6052631578947368, "TT_Math/mode_1": 0.631578947368421, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.25, "completion_length": 336.27734375, "completion_length/mode_0": 361.40625, "completion_length/mode_1": 311.1484375, "epoch": 0.05004549590536852, "format_confidence": 0.5, "grad_norm": 1.0145037617177997, "grounded_proportion": 0.5, "kl": 0.01055908203125, "learning_rate": 9.508644222020018e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.44921875, "reward_std": 0.27354732155799866, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.44921875, "step": 55 }, { "TT_Chart/mode_0": 0.3888888888888889, "TT_Chart/mode_1": 0.3888888888888889, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.16666666666666666, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.7115384615384616, "TT_Math/mode_1": 0.5961538461538461, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.25, "completion_length": 319.97265625, "completion_length/mode_0": 339.140625, "completion_length/mode_1": 300.8046875, "epoch": 0.050955414012738856, "format_confidence": 0.5, "grad_norm": 1.126724757554702, "grounded_proportion": 0.5, "kl": 0.01165771484375, "learning_rate": 9.499545040946314e-07, "loss": 0.0005, "over_lengthy_sequences": 0.00390625, "reward": 1.44140625, "reward_std": 0.27130943536758423, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.4453125, "step": 56 }, { "TT_Chart/mode_0": 0.3333333333333333, "TT_Chart/mode_1": 0.3333333333333333, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.875, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4264705882352941, "TT_Math/mode_1": 0.4852941176470588, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.25, "completion_length": 377.96875, "completion_length/mode_0": 419.2265625, "completion_length/mode_1": 336.7109375, "epoch": 0.051865332120109194, "format_confidence": 0.5, "grad_norm": 0.7761878682566107, "grounded_proportion": 0.5, "kl": 0.012939453125, "learning_rate": 9.490445859872611e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.33984375, "reward_std": 0.19791889190673828, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.33984375, "step": 57 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.16666666666666666, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5131578947368421, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 354.40625, "completion_length/mode_0": 378.8828125, "completion_length/mode_1": 329.9296875, "epoch": 0.052775250227479524, "format_confidence": 0.5, "grad_norm": 1.0626500338136928, "grounded_proportion": 0.5, "kl": 0.03759765625, "learning_rate": 9.481346678798907e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.37109375, "reward_std": 0.25118574500083923, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.37109375, "step": 58 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.46, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.25, "completion_length": 438.22265625, "completion_length/mode_0": 464.8359375, "completion_length/mode_1": 411.609375, "epoch": 0.05368516833484986, "format_confidence": 0.5, "grad_norm": 1.2595171994008831, "grounded_proportion": 0.5, "kl": 0.00750732421875, "learning_rate": 9.472247497725204e-07, "loss": 0.0003, "over_lengthy_sequences": 0.0, "reward": 1.4453125, "reward_std": 0.26565414667129517, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4453125, "step": 59 }, { "TT_Chart/mode_0": 0.21428571428571427, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.40625, "TT_Math/mode_1": 0.265625, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 421.61328125, "completion_length/mode_0": 453.5625, "completion_length/mode_1": 389.6640625, "epoch": 0.0545950864422202, "format_confidence": 0.5, "grad_norm": 0.7843446949263764, "grounded_proportion": 0.5, "kl": 0.0103759765625, "learning_rate": 9.463148316651502e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.2734375, "reward_std": 0.24078628420829773, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.2734375, "step": 60 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.125, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5568181818181818, "TT_Math/mode_1": 0.5568181818181818, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 374.40625, "completion_length/mode_0": 416.109375, "completion_length/mode_1": 332.703125, "epoch": 0.055505004549590536, "format_confidence": 0.5, "grad_norm": 0.49588424035326556, "grounded_proportion": 0.5, "kl": 0.00921630859375, "learning_rate": 9.454049135577798e-07, "loss": 0.0004, "over_lengthy_sequences": 0.0, "reward": 1.4140625, "reward_std": 0.20528410375118256, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.41796875, "step": 61 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.3125, "TT_Math/mode_0": 0.4166666666666667, "TT_Math/mode_1": 0.5694444444444444, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 370.55078125, "completion_length/mode_0": 399.0546875, "completion_length/mode_1": 342.046875, "epoch": 0.056414922656960874, "format_confidence": 0.5, "grad_norm": 0.9866700336073141, "grounded_proportion": 0.5, "kl": 0.01806640625, "learning_rate": 9.444949954504094e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.44921875, "reward_std": 0.26063913106918335, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.44921875, "step": 62 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.53125, "TT_Counting/mode_1": 0.4375, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.6166666666666667, "TT_Math/mode_1": 0.7, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 312.875, "completion_length/mode_0": 329.6640625, "completion_length/mode_1": 296.0859375, "epoch": 0.05732484076433121, "format_confidence": 0.5, "grad_norm": 0.826234605264805, "grounded_proportion": 0.5, "kl": 0.0211181640625, "learning_rate": 9.435850773430391e-07, "loss": 0.0008, "over_lengthy_sequences": 0.0, "reward": 1.48828125, "reward_std": 0.2982718050479889, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.4921875, "step": 63 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.08333333333333333, "TT_Counting/mode_0": 0.4642857142857143, "TT_Counting/mode_1": 0.42857142857142855, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.5588235294117647, "TT_Math/mode_1": 0.4852941176470588, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 320.4296875, "completion_length/mode_0": 344.984375, "completion_length/mode_1": 295.875, "epoch": 0.05823475887170155, "format_confidence": 0.5, "grad_norm": 1.332396950969122, "grounded_proportion": 0.5, "kl": 0.017822265625, "learning_rate": 9.426751592356688e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.4375, "reward_std": 0.30236050486564636, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4375, "step": 64 }, { "TT_Chart/mode_0": 0.5833333333333334, "TT_Chart/mode_1": 0.4166666666666667, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.7708333333333334, "TT_Math/mode_1": 0.7708333333333334, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.35, "TT_Science/mode_1": 0.4, "completion_length": 265.51171875, "completion_length/mode_0": 294.296875, "completion_length/mode_1": 236.7265625, "epoch": 0.059144676979071886, "format_confidence": 0.5, "grad_norm": 1.0706428268753283, "grounded_proportion": 0.5, "kl": 0.0172119140625, "learning_rate": 9.417652411282983e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.53515625, "reward_std": 0.2715778052806854, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.53515625, "step": 65 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.35714285714285715, "TT_Math/mode_0": 0.625, "TT_Math/mode_1": 0.703125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.1875, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.35, "TT_Science/mode_1": 0.4, "completion_length": 337.359375, "completion_length/mode_0": 346.75, "completion_length/mode_1": 327.96875, "epoch": 0.06005459508644222, "format_confidence": 0.5, "grad_norm": 0.9882812019724813, "grounded_proportion": 0.5, "kl": 0.0286865234375, "learning_rate": 9.408553230209281e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.4296875, "reward_std": 0.2574900984764099, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4296875, "step": 66 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.16666666666666666, "TT_Counting/mode_1": 0.16666666666666666, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.5227272727272727, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.5, "completion_length": 360.41796875, "completion_length/mode_0": 384.0, "completion_length/mode_1": 336.8359375, "epoch": 0.060964513193812554, "format_confidence": 0.5, "grad_norm": 1.081856623327895, "grounded_proportion": 0.5, "kl": 0.013671875, "learning_rate": 9.399454049135578e-07, "loss": 0.0005, "over_lengthy_sequences": 0.0, "reward": 1.3984375, "reward_std": 0.3071898818016052, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3984375, "step": 67 }, { "TT_Chart/mode_0": 0.3333333333333333, "TT_Chart/mode_1": 0.3333333333333333, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.36904761904761907, "TT_Math/mode_1": 0.4523809523809524, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.25, "completion_length": 382.390625, "completion_length/mode_0": 399.09375, "completion_length/mode_1": 365.6875, "epoch": 0.06187443130118289, "format_confidence": 0.5, "grad_norm": 1.0129574632580445, "grounded_proportion": 0.5, "kl": 0.0257568359375, "learning_rate": 9.390354868061873e-07, "loss": 0.001, "over_lengthy_sequences": 0.00390625, "reward": 1.36328125, "reward_std": 0.33184993267059326, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.3671875, "step": 68 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.5625, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.35714285714285715, "TT_Math/mode_1": 0.42857142857142855, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5, "completion_length": 315.05078125, "completion_length/mode_0": 341.9140625, "completion_length/mode_1": 288.1875, "epoch": 0.06278434940855324, "format_confidence": 0.5, "grad_norm": 0.6116970446341301, "grounded_proportion": 0.5, "kl": 0.031005859375, "learning_rate": 9.381255686988171e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.31640625, "reward_std": 0.25513356924057007, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.31640625, "step": 69 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.75, "TT_Counting/mode_0": 0.5833333333333334, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.6111111111111112, "TT_Math/mode_1": 0.6388888888888888, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 1.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.3, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 1.0, "completion_length": 325.84375, "completion_length/mode_0": 348.8671875, "completion_length/mode_1": 302.8203125, "epoch": 0.06369426751592357, "format_confidence": 0.5, "grad_norm": 1.393496150546713, "grounded_proportion": 0.5, "kl": 0.0267333984375, "learning_rate": 9.372156505914467e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.49609375, "reward_std": 0.3036562502384186, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.49609375, "step": 70 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5277777777777778, "TT_Math/mode_1": 0.5694444444444444, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.375, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 358.953125, "completion_length/mode_0": 377.46875, "completion_length/mode_1": 340.4375, "epoch": 0.0646041856232939, "format_confidence": 0.5, "grad_norm": 0.8157106114860397, "grounded_proportion": 0.5, "kl": 0.01556396484375, "learning_rate": 9.363057324840764e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.40625, "reward_std": 0.2968239188194275, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40625, "step": 71 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.3888888888888889, "TT_Math/mode_1": 0.37962962962962965, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 524.28515625, "completion_length/mode_0": 538.4765625, "completion_length/mode_1": 510.09375, "epoch": 0.06551410373066424, "format_confidence": 0.5, "grad_norm": 1.4368535247954564, "grounded_proportion": 0.5, "kl": 0.038330078125, "learning_rate": 9.35395814376706e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0078125, "reward": 1.328125, "reward_std": 0.3050992488861084, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.3359375, "step": 72 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.45, "TT_Counting/mode_1": 0.6, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.6052631578947368, "TT_Math/mode_1": 0.618421052631579, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 345.50390625, "completion_length/mode_0": 374.7734375, "completion_length/mode_1": 316.234375, "epoch": 0.06642402183803457, "format_confidence": 0.5, "grad_norm": 1.4581833815927654, "grounded_proportion": 0.5, "kl": 0.027587890625, "learning_rate": 9.344858962693357e-07, "loss": 0.0011, "over_lengthy_sequences": 0.00390625, "reward": 1.47265625, "reward_std": 0.3206025958061218, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.4765625, "step": 73 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.125, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6666666666666666, "TT_Math/mode_1": 0.5833333333333334, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5833333333333334, "TT_Science/mode_1": 0.4166666666666667, "completion_length": 336.33203125, "completion_length/mode_0": 374.1171875, "completion_length/mode_1": 298.546875, "epoch": 0.06733393994540492, "format_confidence": 0.5, "grad_norm": 1.9303402353521169, "grounded_proportion": 0.5, "kl": 0.0269775390625, "learning_rate": 9.335759781619655e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.51171875, "reward_std": 0.40742409229278564, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.51171875, "step": 74 }, { "TT_Chart/mode_0": 0.3, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0625, "TT_Grounding/mode_1": 0.5625, "TT_Math/mode_0": 0.44642857142857145, "TT_Math/mode_1": 0.5178571428571429, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 310.859375, "completion_length/mode_0": 345.890625, "completion_length/mode_1": 275.828125, "epoch": 0.06824385805277525, "format_confidence": 0.5, "grad_norm": 1.094986906392167, "grounded_proportion": 0.5, "kl": 0.0250244140625, "learning_rate": 9.32666060054595e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.32421875, "reward_std": 0.2394905686378479, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.32421875, "step": 75 }, { "TT_Chart/mode_0": 0.35, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.5125, "TT_Math/mode_1": 0.45, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 1.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 444.99609375, "completion_length/mode_0": 487.234375, "completion_length/mode_1": 402.7578125, "epoch": 0.06915377616014559, "format_confidence": 0.5, "grad_norm": 0.9456640910256001, "grounded_proportion": 0.5, "kl": 0.02783203125, "learning_rate": 9.317561419472247e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.32864031195640564, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 76 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.484375, "TT_Math/mode_1": 0.46875, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.375, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.375, "completion_length": 357.015625, "completion_length/mode_0": 384.8125, "completion_length/mode_1": 329.21875, "epoch": 0.07006369426751592, "format_confidence": 0.5, "grad_norm": 1.126552968002394, "grounded_proportion": 0.5, "kl": 0.01446533203125, "learning_rate": 9.308462238398544e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.3359375, "reward_std": 0.20832324028015137, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3359375, "step": 77 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.08333333333333333, "TT_Detection/mode_0": 0.3125, "TT_Detection/mode_1": 0.3125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.39473684210526316, "TT_Math/mode_1": 0.39473684210526316, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.375, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.75, "completion_length": 365.06640625, "completion_length/mode_0": 388.1796875, "completion_length/mode_1": 341.953125, "epoch": 0.07097361237488627, "format_confidence": 0.5, "grad_norm": 1.8929118579408397, "grounded_proportion": 0.5, "kl": 0.021240234375, "learning_rate": 9.299363057324841e-07, "loss": 0.0009, "over_lengthy_sequences": 0.00390625, "reward": 1.36328125, "reward_std": 0.32180553674697876, "rewards/format_reward": 0.98828125, "rewards/general_task_reward": 0.375, "step": 78 }, { "TT_Chart/mode_0": 0.4166666666666667, "TT_Chart/mode_1": 0.4166666666666667, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.5441176470588235, "TT_Math/mode_1": 0.4264705882352941, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.25, "completion_length": 412.8359375, "completion_length/mode_0": 454.6640625, "completion_length/mode_1": 371.0078125, "epoch": 0.0718835304822566, "format_confidence": 0.5, "grad_norm": 2.3611466751955206, "grounded_proportion": 0.5, "kl": 0.0196533203125, "learning_rate": 9.290263876251136e-07, "loss": 0.0008, "over_lengthy_sequences": 0.00390625, "reward": 1.35546875, "reward_std": 0.2786721885204315, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.359375, "step": 79 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.8333333333333334, "TT_Math/mode_0": 0.703125, "TT_Math/mode_1": 0.546875, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.3333333333333333, "TT_Others/mode_1": 0.4166666666666667, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 324.98828125, "completion_length/mode_0": 360.6484375, "completion_length/mode_1": 289.328125, "epoch": 0.07279344858962693, "format_confidence": 0.5, "grad_norm": 1.423595772914213, "grounded_proportion": 0.5, "kl": 0.028076171875, "learning_rate": 9.281164695177434e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.4765625, "reward_std": 0.32864275574684143, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4765625, "step": 80 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.3333333333333333, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.4583333333333333, "TT_Math/mode_1": 0.4861111111111111, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 421.86328125, "completion_length/mode_0": 444.5234375, "completion_length/mode_1": 399.203125, "epoch": 0.07370336669699727, "format_confidence": 0.5, "grad_norm": 0.9052028066723226, "grounded_proportion": 0.5, "kl": 0.0186767578125, "learning_rate": 9.272065514103731e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.37890625, "reward_std": 0.3306756913661957, "rewards/format_reward": 0.98828125, "rewards/general_task_reward": 0.390625, "step": 81 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0625, "TT_Detection/mode_1": 0.6875, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.55, "TT_Math/mode_1": 0.5125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.5, "completion_length": 383.95703125, "completion_length/mode_0": 410.4375, "completion_length/mode_1": 357.4765625, "epoch": 0.0746132848043676, "format_confidence": 0.5, "grad_norm": 1.1442493170852235, "grounded_proportion": 0.5, "kl": 0.030029296875, "learning_rate": 9.262966333030026e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.46875, "reward_std": 0.3109995126724243, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.47265625, "step": 82 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.125, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.4166666666666667, "TT_Math/mode_0": 0.525, "TT_Math/mode_1": 0.475, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.625, "completion_length": 355.09765625, "completion_length/mode_0": 388.8984375, "completion_length/mode_1": 321.296875, "epoch": 0.07552320291173795, "format_confidence": 0.5, "grad_norm": 1.1396443794919924, "grounded_proportion": 0.5, "kl": 0.01708984375, "learning_rate": 9.253867151956324e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.4296875, "reward_std": 0.3350968360900879, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4296875, "step": 83 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.08333333333333333, "TT_Counting/mode_0": 0.3, "TT_Counting/mode_1": 0.3, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.4375, "TT_Document/mode_0": 0.3333333333333333, "TT_Document/mode_1": 0.3333333333333333, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.4807692307692308, "TT_Math/mode_1": 0.5769230769230769, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 306.87890625, "completion_length/mode_0": 345.4296875, "completion_length/mode_1": 268.328125, "epoch": 0.07643312101910828, "format_confidence": 0.5, "grad_norm": 0.8312318484762555, "grounded_proportion": 0.5, "kl": 0.0380859375, "learning_rate": 9.24476797088262e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.36328125, "reward_std": 0.2795896828174591, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.3671875, "step": 84 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.08333333333333333, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.4270833333333333, "TT_Math/mode_1": 0.4479166666666667, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 418.26953125, "completion_length/mode_0": 462.9609375, "completion_length/mode_1": 373.578125, "epoch": 0.07734303912647862, "format_confidence": 0.5, "grad_norm": 0.7031346175738115, "grounded_proportion": 0.5, "kl": 0.0191650390625, "learning_rate": 9.235668789808917e-07, "loss": 0.0008, "over_lengthy_sequences": 0.0, "reward": 1.37890625, "reward_std": 0.2682702839374542, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.37890625, "step": 85 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.3125, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.6710526315789473, "TT_Math/mode_1": 0.631578947368421, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.75, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.25, "completion_length": 314.35546875, "completion_length/mode_0": 347.1015625, "completion_length/mode_1": 281.609375, "epoch": 0.07825295723384895, "format_confidence": 0.5, "grad_norm": 1.1106438112336248, "grounded_proportion": 0.5, "kl": 0.033447265625, "learning_rate": 9.226569608735213e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.53125, "reward_std": 0.3708537220954895, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.53125, "step": 86 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.4659090909090909, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 459.609375, "completion_length/mode_0": 470.390625, "completion_length/mode_1": 448.828125, "epoch": 0.0791628753412193, "format_confidence": 0.5, "grad_norm": 0.9111319582454244, "grounded_proportion": 0.5, "kl": 0.0283203125, "learning_rate": 9.21747042766151e-07, "loss": 0.0011, "over_lengthy_sequences": 0.00390625, "reward": 1.37890625, "reward_std": 0.32089686393737793, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.3828125, "step": 87 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.3125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.16666666666666666, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.45588235294117646, "TT_Math/mode_1": 0.35294117647058826, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3, "TT_Science/mode_1": 0.5, "completion_length": 288.15625, "completion_length/mode_0": 321.078125, "completion_length/mode_1": 255.234375, "epoch": 0.08007279344858963, "format_confidence": 0.5, "grad_norm": 0.9337639335763862, "grounded_proportion": 0.5, "kl": 0.0252685546875, "learning_rate": 9.208371246587808e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.33984375, "reward_std": 0.32207000255584717, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.33984375, "step": 88 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.16666666666666666, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.4444444444444444, "TT_Math/mode_1": 0.4305555555555556, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.0, "completion_length": 379.68359375, "completion_length/mode_0": 409.75, "completion_length/mode_1": 349.6171875, "epoch": 0.08098271155595996, "format_confidence": 0.5, "grad_norm": 0.9170168681283755, "grounded_proportion": 0.5, "kl": 0.04052734375, "learning_rate": 9.199272065514103e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.27734375, "reward_std": 0.2290886491537094, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.27734375, "step": 89 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.1875, "TT_Grounding/mode_1": 0.3125, "TT_Math/mode_0": 0.5625, "TT_Math/mode_1": 0.525, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 1.0, "completion_length": 332.9296875, "completion_length/mode_0": 359.171875, "completion_length/mode_1": 306.6875, "epoch": 0.0818926296633303, "format_confidence": 0.5, "grad_norm": 1.2383174113880757, "grounded_proportion": 0.5, "kl": 0.0498046875, "learning_rate": 9.1901728844404e-07, "loss": 0.002, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.29393690824508667, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 90 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.3125, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.3125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.4625, "TT_Math/mode_1": 0.4, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 1.0, "completion_length": 342.94921875, "completion_length/mode_0": 382.40625, "completion_length/mode_1": 303.4921875, "epoch": 0.08280254777070063, "format_confidence": 0.5, "grad_norm": 1.0193811299262545, "grounded_proportion": 0.5, "kl": 0.03173828125, "learning_rate": 9.181073703366697e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.3356223702430725, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 91 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5657894736842105, "TT_Math/mode_1": 0.4473684210526316, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.1875, "completion_length": 314.7265625, "completion_length/mode_0": 340.65625, "completion_length/mode_1": 288.796875, "epoch": 0.08371246587807098, "format_confidence": 0.5, "grad_norm": 0.9300317397982756, "grounded_proportion": 0.5, "kl": 0.02294921875, "learning_rate": 9.171974522292994e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.3043053150177002, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 92 }, { "TT_Chart/mode_0": 0.4375, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.16666666666666666, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.359375, "TT_Math/mode_1": 0.3125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 1.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.1875, "completion_length": 352.9375, "completion_length/mode_0": 383.890625, "completion_length/mode_1": 321.984375, "epoch": 0.08462238398544131, "format_confidence": 0.5, "grad_norm": 1.3154100577214958, "grounded_proportion": 0.5, "kl": 0.0419921875, "learning_rate": 9.162875341219289e-07, "loss": 0.0017, "over_lengthy_sequences": 0.0, "reward": 1.328125, "reward_std": 0.29681897163391113, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.328125, "step": 93 }, { "TT_Chart/mode_0": 0.4375, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.453125, "TT_Math/mode_1": 0.328125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 338.6953125, "completion_length/mode_0": 377.4609375, "completion_length/mode_1": 299.9296875, "epoch": 0.08553230209281165, "format_confidence": 0.5, "grad_norm": 0.8794839213028697, "grounded_proportion": 0.5, "kl": 0.0306396484375, "learning_rate": 9.153776160145587e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.27734375, "reward_std": 0.2864776849746704, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.28125, "step": 94 }, { "TT_Chart/mode_0": 0.4375, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.675, "TT_Math/mode_1": 0.6125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.4166666666666667, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 403.01171875, "completion_length/mode_0": 441.59375, "completion_length/mode_1": 364.4296875, "epoch": 0.08644222020018198, "format_confidence": 0.5, "grad_norm": 0.7977211310971815, "grounded_proportion": 0.5, "kl": 0.0137939453125, "learning_rate": 9.144676979071884e-07, "loss": 0.0006, "over_lengthy_sequences": 0.0, "reward": 1.52734375, "reward_std": 0.30707117915153503, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.52734375, "step": 95 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.875, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.35294117647058826, "TT_Math/mode_1": 0.3088235294117647, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.4166666666666667, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 380.80078125, "completion_length/mode_0": 407.078125, "completion_length/mode_1": 354.5234375, "epoch": 0.08735213830755233, "format_confidence": 0.5, "grad_norm": 0.893912421847757, "grounded_proportion": 0.5, "kl": 0.039306640625, "learning_rate": 9.135577797998179e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.3125, "reward_std": 0.23303402960300446, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3125, "step": 96 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.6666666666666666, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.875, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6309523809523809, "TT_Math/mode_1": 0.5476190476190477, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.5, "completion_length": 349.80859375, "completion_length/mode_0": 384.3671875, "completion_length/mode_1": 315.25, "epoch": 0.08826205641492266, "format_confidence": 0.5, "grad_norm": 1.488221637431153, "grounded_proportion": 0.5, "kl": 0.047607421875, "learning_rate": 9.126478616924477e-07, "loss": 0.0019, "over_lengthy_sequences": 0.0, "reward": 1.48046875, "reward_std": 0.33312100172042847, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.48046875, "step": 97 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.75, "TT_Counting/mode_0": 0.125, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.875, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.475, "TT_Math/mode_1": 0.5125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 386.19140625, "completion_length/mode_0": 423.7421875, "completion_length/mode_1": 348.640625, "epoch": 0.08917197452229299, "format_confidence": 0.5, "grad_norm": 0.7423205892030071, "grounded_proportion": 0.5, "kl": 0.0419921875, "learning_rate": 9.117379435850773e-07, "loss": 0.0017, "over_lengthy_sequences": 0.00390625, "reward": 1.4375, "reward_std": 0.2985200881958008, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.44140625, "step": 98 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.4166666666666667, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.4375, "TT_Detection/mode_0": 0.375, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.08333333333333333, "TT_Grounding/mode_1": 0.08333333333333333, "TT_Math/mode_0": 0.578125, "TT_Math/mode_1": 0.546875, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 307.75, "completion_length/mode_0": 326.390625, "completion_length/mode_1": 289.109375, "epoch": 0.09008189262966333, "format_confidence": 0.5, "grad_norm": 1.089867159322649, "grounded_proportion": 0.5, "kl": 0.05029296875, "learning_rate": 9.108280254777069e-07, "loss": 0.002, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.2457924783229828, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 99 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0625, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.08333333333333333, "TT_Grounding/mode_1": 0.08333333333333333, "TT_Math/mode_0": 0.3333333333333333, "TT_Math/mode_1": 0.2857142857142857, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.25, "completion_length": 390.5234375, "completion_length/mode_0": 416.84375, "completion_length/mode_1": 364.203125, "epoch": 0.09099181073703366, "format_confidence": 0.5, "grad_norm": 0.7432661418713463, "grounded_proportion": 0.5, "kl": 0.033447265625, "learning_rate": 9.099181073703366e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.296875, "reward_std": 0.3486403822898865, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.3046875, "step": 100 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5138888888888888, "TT_Math/mode_1": 0.4722222222222222, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3333333333333333, "TT_Science/mode_1": 0.25, "completion_length": 417.96875, "completion_length/mode_0": 456.1015625, "completion_length/mode_1": 379.8359375, "epoch": 0.09190172884440401, "format_confidence": 0.5, "grad_norm": 0.8403418247569083, "grounded_proportion": 0.5, "kl": 0.017822265625, "learning_rate": 9.090081892629663e-07, "loss": 0.0007, "over_lengthy_sequences": 0.0, "reward": 1.37890625, "reward_std": 0.31049102544784546, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.37890625, "step": 101 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.4375, "TT_Counting/mode_1": 0.1875, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.4852941176470588, "TT_Math/mode_1": 0.38235294117647056, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 1.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.1875, "completion_length": 357.59765625, "completion_length/mode_0": 374.7421875, "completion_length/mode_1": 340.453125, "epoch": 0.09281164695177434, "format_confidence": 0.5, "grad_norm": 1.0298049787611796, "grounded_proportion": 0.5, "kl": 0.04736328125, "learning_rate": 9.08098271155596e-07, "loss": 0.0019, "over_lengthy_sequences": 0.0, "reward": 1.3671875, "reward_std": 0.30194875597953796, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3671875, "step": 102 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.45, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.6333333333333333, "TT_Math/mode_1": 0.55, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 1.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 289.25, "completion_length/mode_0": 316.984375, "completion_length/mode_1": 261.515625, "epoch": 0.09372156505914468, "format_confidence": 0.5, "grad_norm": 1.322134373732465, "grounded_proportion": 0.5, "kl": 0.03857421875, "learning_rate": 9.071883530482256e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.4375, "reward_std": 0.3878220021724701, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4375, "step": 103 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.42105263157894735, "TT_Math/mode_1": 0.4605263157894737, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3333333333333333, "TT_Science/mode_1": 0.0, "completion_length": 394.15625, "completion_length/mode_0": 421.609375, "completion_length/mode_1": 366.703125, "epoch": 0.09463148316651501, "format_confidence": 0.5, "grad_norm": 1.138479077746897, "grounded_proportion": 0.5, "kl": 0.02734375, "learning_rate": 9.062784349408553e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.33984375, "reward_std": 0.28182753920555115, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.33984375, "step": 104 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.375, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.6470588235294118, "TT_Math/mode_1": 0.6176470588235294, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3333333333333333, "TT_Science/mode_1": 0.0, "completion_length": 359.4453125, "completion_length/mode_0": 369.296875, "completion_length/mode_1": 349.59375, "epoch": 0.09554140127388536, "format_confidence": 0.5, "grad_norm": 0.8292575747476529, "grounded_proportion": 0.5, "kl": 0.029052734375, "learning_rate": 9.05368516833485e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.52734375, "reward_std": 0.22620166838169098, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.53125, "step": 105 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.75, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.08333333333333333, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.47368421052631576, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.4166666666666667, "TT_Others/mode_1": 0.6666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3333333333333333, "TT_Science/mode_1": 0.0, "completion_length": 311.8515625, "completion_length/mode_0": 330.546875, "completion_length/mode_1": 293.15625, "epoch": 0.09645131938125569, "format_confidence": 0.5, "grad_norm": 0.7435578662413549, "grounded_proportion": 0.5, "kl": 0.0233154296875, "learning_rate": 9.044585987261146e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.453125, "reward_std": 0.3033941984176636, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.453125, "step": 106 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.625, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0625, "TT_Math/mode_0": 0.39705882352941174, "TT_Math/mode_1": 0.4264705882352941, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.4166666666666667, "TT_Others/mode_1": 0.6666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 379.23046875, "completion_length/mode_0": 410.96875, "completion_length/mode_1": 347.4921875, "epoch": 0.09736123748862602, "format_confidence": 0.5, "grad_norm": 1.95451747528783, "grounded_proportion": 0.5, "kl": 0.031494140625, "learning_rate": 9.035486806187442e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.3671875, "reward_std": 0.2845958471298218, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3671875, "step": 107 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.35, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5394736842105263, "TT_Math/mode_1": 0.5394736842105263, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 430.86328125, "completion_length/mode_0": 444.203125, "completion_length/mode_1": 417.5234375, "epoch": 0.09827115559599636, "format_confidence": 0.5, "grad_norm": 0.6617312507830653, "grounded_proportion": 0.5, "kl": 0.0247802734375, "learning_rate": 9.02638762511374e-07, "loss": 0.001, "over_lengthy_sequences": 0.01171875, "reward": 1.40234375, "reward_std": 0.2459551990032196, "rewards/format_reward": 0.98828125, "rewards/general_task_reward": 0.4140625, "step": 108 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.125, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 1.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.55, "TT_Math/mode_1": 0.43333333333333335, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.4166666666666667, "completion_length": 364.859375, "completion_length/mode_0": 396.6875, "completion_length/mode_1": 333.03125, "epoch": 0.09918107370336669, "format_confidence": 0.5, "grad_norm": 1.1162695339468396, "grounded_proportion": 0.5, "kl": 0.0306396484375, "learning_rate": 9.017288444040037e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.306543231010437, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.38671875, "step": 109 }, { "TT_Chart/mode_0": 0.6, "TT_Chart/mode_1": 0.55, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.45, "TT_Detection/mode_0": 0.3333333333333333, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.4117647058823529, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 350.5859375, "completion_length/mode_0": 396.2421875, "completion_length/mode_1": 304.9296875, "epoch": 0.10009099181073704, "format_confidence": 0.5, "grad_norm": 1.4082884314141118, "grounded_proportion": 0.5, "kl": 0.0264892578125, "learning_rate": 9.008189262966332e-07, "loss": 0.0011, "over_lengthy_sequences": 0.00390625, "reward": 1.421875, "reward_std": 0.372009813785553, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.42578125, "step": 110 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.3333333333333333, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.5263157894736842, "TT_Math/mode_1": 0.5263157894736842, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 272.01953125, "completion_length/mode_0": 287.265625, "completion_length/mode_1": 256.7734375, "epoch": 0.10100090991810737, "format_confidence": 0.5, "grad_norm": 14.658431508796246, "grounded_proportion": 0.5, "kl": 0.034423828125, "learning_rate": 8.99909008189263e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.46875, "reward_std": 0.29970598220825195, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.46875, "step": 111 }, { "TT_Chart/mode_0": 0.16666666666666666, "TT_Chart/mode_1": 0.16666666666666666, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.6052631578947368, "TT_Math/mode_1": 0.5526315789473685, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.5, "completion_length": 357.01171875, "completion_length/mode_0": 377.296875, "completion_length/mode_1": 336.7265625, "epoch": 0.10191082802547771, "format_confidence": 0.5, "grad_norm": 0.8195338040008043, "grounded_proportion": 0.5, "kl": 0.030517578125, "learning_rate": 8.989990900818926e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.45703125, "reward_std": 0.305894136428833, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.45703125, "step": 112 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.625, "TT_Counting/mode_0": 0.5833333333333334, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.4852941176470588, "TT_Math/mode_1": 0.4852941176470588, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.4375, "completion_length": 337.96875, "completion_length/mode_0": 374.4453125, "completion_length/mode_1": 301.4921875, "epoch": 0.10282074613284804, "format_confidence": 0.5, "grad_norm": 0.8731230125231162, "grounded_proportion": 0.5, "kl": 0.026123046875, "learning_rate": 8.980891719745222e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.453125, "reward_std": 0.246971994638443, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.453125, "step": 113 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.3125, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.9166666666666666, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6071428571428571, "TT_Math/mode_1": 0.5357142857142857, "TT_OCR/mode_0": 0.125, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.0625, "TT_Puzzle/mode_0": 1.0, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.0, "completion_length": 338.16015625, "completion_length/mode_0": 361.3359375, "completion_length/mode_1": 314.984375, "epoch": 0.10373066424021839, "format_confidence": 0.5, "grad_norm": 0.7811590590595154, "grounded_proportion": 0.5, "kl": 0.027587890625, "learning_rate": 8.971792538671519e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.23079612851142883, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 114 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.3382352941176471, "TT_Math/mode_1": 0.39705882352941174, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.125, "completion_length": 328.72265625, "completion_length/mode_0": 343.1015625, "completion_length/mode_1": 314.34375, "epoch": 0.10464058234758872, "format_confidence": 0.5, "grad_norm": 0.8326183125105682, "grounded_proportion": 0.5, "kl": 0.037353515625, "learning_rate": 8.962693357597816e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.3203125, "reward_std": 0.2396092265844345, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3203125, "step": 115 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.8333333333333334, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.5384615384615384, "TT_Math/mode_1": 0.5769230769230769, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.16666666666666666, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.40625, "TT_Science/mode_1": 0.21875, "completion_length": 325.96484375, "completion_length/mode_0": 346.40625, "completion_length/mode_1": 305.5234375, "epoch": 0.10555050045495905, "format_confidence": 0.5, "grad_norm": 1.1036768563812662, "grounded_proportion": 0.5, "kl": 0.0242919921875, "learning_rate": 8.953594176524113e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.4296875, "reward_std": 0.3274608254432678, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4296875, "step": 116 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.5833333333333334, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.16666666666666666, "TT_Document/mode_1": 0.08333333333333333, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5166666666666667, "TT_Math/mode_1": 0.5833333333333334, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.5833333333333334, "TT_Science/mode_1": 0.5833333333333334, "completion_length": 330.74609375, "completion_length/mode_0": 349.3984375, "completion_length/mode_1": 312.09375, "epoch": 0.10646041856232939, "format_confidence": 0.5, "grad_norm": 0.760986746071062, "grounded_proportion": 0.5, "kl": 0.02978515625, "learning_rate": 8.944494995450409e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.1817479431629181, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 117 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.42857142857142855, "TT_Counting/mode_1": 0.6071428571428571, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.45588235294117646, "TT_Math/mode_1": 0.47058823529411764, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.5833333333333334, "TT_Science/mode_1": 0.5833333333333334, "completion_length": 403.1484375, "completion_length/mode_0": 429.703125, "completion_length/mode_1": 376.59375, "epoch": 0.10737033666969972, "format_confidence": 0.5, "grad_norm": 0.8553481604117894, "grounded_proportion": 0.5, "kl": 0.0291748046875, "learning_rate": 8.935395814376706e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.45703125, "reward_std": 0.308400422334671, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.45703125, "step": 118 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.5394736842105263, "TT_Math/mode_1": 0.39473684210526316, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.25, "completion_length": 328.36328125, "completion_length/mode_0": 361.7578125, "completion_length/mode_1": 294.96875, "epoch": 0.10828025477707007, "format_confidence": 0.5, "grad_norm": 1.3318825940512236, "grounded_proportion": 0.5, "kl": 0.033203125, "learning_rate": 8.926296633303002e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.3984375, "reward_std": 0.3040344715118408, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3984375, "step": 119 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.4625, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.4166666666666667, "TT_Science/mode_1": 0.25, "completion_length": 424.55859375, "completion_length/mode_0": 449.5390625, "completion_length/mode_1": 399.578125, "epoch": 0.1091901728844404, "format_confidence": 0.5, "grad_norm": 1.1258840181711312, "grounded_proportion": 0.5, "kl": 0.0235595703125, "learning_rate": 8.917197452229299e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.375, "reward_std": 0.32681170105934143, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.375, "step": 120 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 1.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.75, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.4852941176470588, "TT_Math/mode_1": 0.5588235294117647, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.375, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 1.0, "TT_Science/mode_0": 0.4, "TT_Science/mode_1": 0.4, "completion_length": 337.171875, "completion_length/mode_0": 355.90625, "completion_length/mode_1": 318.4375, "epoch": 0.11010009099181074, "format_confidence": 0.5, "grad_norm": 1.8863415714384077, "grounded_proportion": 0.5, "kl": 0.029541015625, "learning_rate": 8.908098271155595e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.4375, "reward_std": 0.337970107793808, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4375, "step": 121 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.1875, "TT_Math/mode_0": 0.6, "TT_Math/mode_1": 0.5333333333333333, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 318.25, "completion_length/mode_0": 324.25, "completion_length/mode_1": 312.25, "epoch": 0.11101000909918107, "format_confidence": 0.5, "grad_norm": 0.7478850656639994, "grounded_proportion": 0.5, "kl": 0.0390625, "learning_rate": 8.898999090081893e-07, "loss": 0.0016, "over_lengthy_sequences": 0.00390625, "reward": 1.36328125, "reward_std": 0.24553291499614716, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.3671875, "step": 122 }, { "TT_Chart/mode_0": 0.3, "TT_Chart/mode_1": 0.15, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.16666666666666666, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.39285714285714285, "TT_Math/mode_1": 0.3392857142857143, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.25, "completion_length": 299.01171875, "completion_length/mode_0": 332.2109375, "completion_length/mode_1": 265.8125, "epoch": 0.11191992720655142, "format_confidence": 0.5, "grad_norm": 1.0087357565512716, "grounded_proportion": 0.5, "kl": 0.0294189453125, "learning_rate": 8.889899909008188e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.2734375, "reward_std": 0.3007756471633911, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.27734375, "step": 123 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.3333333333333333, "TT_Math/mode_0": 0.4027777777777778, "TT_Math/mode_1": 0.4583333333333333, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.5, "TT_Others/mode_1": 0.375, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.4375, "TT_Science/mode_1": 0.5, "completion_length": 372.5859375, "completion_length/mode_0": 401.2109375, "completion_length/mode_1": 343.9609375, "epoch": 0.11282984531392175, "format_confidence": 0.5, "grad_norm": 0.8137821391046608, "grounded_proportion": 0.5, "kl": 0.041015625, "learning_rate": 8.880800727934485e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.2759014368057251, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.39453125, "step": 124 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.16666666666666666, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.5166666666666667, "TT_Math/mode_1": 0.55, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.125, "completion_length": 355.76171875, "completion_length/mode_0": 365.4296875, "completion_length/mode_1": 346.09375, "epoch": 0.11373976342129208, "format_confidence": 0.5, "grad_norm": 0.7654413326806154, "grounded_proportion": 0.5, "kl": 0.036865234375, "learning_rate": 8.871701546860783e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0078125, "reward": 1.33984375, "reward_std": 0.26253271102905273, "rewards/format_reward": 0.9921875, "rewards/general_task_reward": 0.34765625, "step": 125 }, { "TT_Chart/mode_0": 0.3333333333333333, "TT_Chart/mode_1": 0.3333333333333333, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.525, "TT_Math/mode_1": 0.45, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.75, "completion_length": 371.875, "completion_length/mode_0": 405.6640625, "completion_length/mode_1": 338.0859375, "epoch": 0.11464968152866242, "format_confidence": 0.5, "grad_norm": 0.6049711014702803, "grounded_proportion": 0.5, "kl": 0.036376953125, "learning_rate": 8.862602365787079e-07, "loss": 0.0015, "over_lengthy_sequences": 0.00390625, "reward": 1.4140625, "reward_std": 0.28288590908050537, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.41796875, "step": 126 }, { "TT_Chart/mode_0": 0.4, "TT_Chart/mode_1": 0.4, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.75, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4444444444444444, "TT_Math/mode_1": 0.5277777777777778, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 325.46484375, "completion_length/mode_0": 355.6796875, "completion_length/mode_1": 295.25, "epoch": 0.11555959963603275, "format_confidence": 0.5, "grad_norm": 1.8410793961989773, "grounded_proportion": 0.5, "kl": 0.033203125, "learning_rate": 8.853503184713375e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.39453125, "reward_std": 0.2690715193748474, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.39453125, "step": 127 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.75, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.5625, "TT_Math/mode_1": 0.515625, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5416666666666666, "TT_Science/mode_1": 0.5, "completion_length": 320.73046875, "completion_length/mode_0": 337.2265625, "completion_length/mode_1": 304.234375, "epoch": 0.1164695177434031, "format_confidence": 0.5, "grad_norm": 0.6389432250465099, "grounded_proportion": 0.5, "kl": 0.0242919921875, "learning_rate": 8.844404003639672e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.45703125, "reward_std": 0.23922216892242432, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.45703125, "step": 128 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.4375, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6666666666666666, "TT_Math/mode_1": 0.5166666666666667, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.3333333333333333, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.75, "completion_length": 406.078125, "completion_length/mode_0": 416.359375, "completion_length/mode_1": 395.796875, "epoch": 0.11737943585077343, "format_confidence": 0.5, "grad_norm": 0.5631128640852374, "grounded_proportion": 0.5, "kl": 0.038818359375, "learning_rate": 8.835304822565969e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.48828125, "reward_std": 0.26944732666015625, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.48828125, "step": 129 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.125, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.4375, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.6, "TT_Math/mode_1": 0.6, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.3333333333333333, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 353.71484375, "completion_length/mode_0": 367.8984375, "completion_length/mode_1": 339.53125, "epoch": 0.11828935395814377, "format_confidence": 0.5, "grad_norm": 1.1261868640139805, "grounded_proportion": 0.5, "kl": 0.032470703125, "learning_rate": 8.826205641492264e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.4453125, "reward_std": 0.2877512276172638, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4453125, "step": 130 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.375, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.46875, "TT_Math/mode_1": 0.40625, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 1.0, "completion_length": 301.109375, "completion_length/mode_0": 332.0390625, "completion_length/mode_1": 270.1796875, "epoch": 0.1191992720655141, "format_confidence": 0.5, "grad_norm": 1.3174322812929569, "grounded_proportion": 0.5, "kl": 0.05322265625, "learning_rate": 8.817106460418562e-07, "loss": 0.0021, "over_lengthy_sequences": 0.0, "reward": 1.3671875, "reward_std": 0.23277445137500763, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3671875, "step": 131 }, { "TT_Chart/mode_0": 0.39285714285714285, "TT_Chart/mode_1": 0.35714285714285715, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.375, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5555555555555556, "TT_Math/mode_1": 0.5277777777777778, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.75, "completion_length": 325.77734375, "completion_length/mode_0": 339.9296875, "completion_length/mode_1": 311.625, "epoch": 0.12010919017288443, "format_confidence": 0.5, "grad_norm": 0.7069147924688921, "grounded_proportion": 0.5, "kl": 0.022705078125, "learning_rate": 8.808007279344859e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.453125, "reward_std": 0.2563130855560303, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.453125, "step": 132 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.16666666666666666, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.75, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.484375, "TT_Math/mode_1": 0.328125, "TT_OCR/mode_0": 0.125, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5833333333333334, "completion_length": 326.15234375, "completion_length/mode_0": 331.0, "completion_length/mode_1": 321.3046875, "epoch": 0.12101910828025478, "format_confidence": 0.5, "grad_norm": 1.4281295890879266, "grounded_proportion": 0.5, "kl": 0.04443359375, "learning_rate": 8.798908098271155e-07, "loss": 0.0018, "over_lengthy_sequences": 0.0, "reward": 1.40234375, "reward_std": 0.2764293849468231, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40234375, "step": 133 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.125, "TT_Counting/mode_0": 0.35, "TT_Counting/mode_1": 0.2, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.5535714285714286, "TT_Math/mode_1": 0.5714285714285714, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.25, "completion_length": 311.26171875, "completion_length/mode_0": 341.046875, "completion_length/mode_1": 281.4765625, "epoch": 0.12192902638762511, "format_confidence": 0.5, "grad_norm": 2.805065330097895, "grounded_proportion": 0.5, "kl": 0.0291748046875, "learning_rate": 8.789808917197452e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.37109375, "reward_std": 0.284593403339386, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.37109375, "step": 134 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.6666666666666666, "TT_Counting/mode_1": 0.6666666666666666, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.5833333333333334, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 375.53125, "completion_length/mode_0": 389.0234375, "completion_length/mode_1": 362.0390625, "epoch": 0.12283894449499545, "format_confidence": 0.5, "grad_norm": 0.7313698173703068, "grounded_proportion": 0.5, "kl": 0.0250244140625, "learning_rate": 8.780709736123748e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.4296875, "reward_std": 0.2764318287372589, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4296875, "step": 135 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.55, "TT_Counting/mode_1": 0.35, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.4, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 378.30078125, "completion_length/mode_0": 388.0859375, "completion_length/mode_1": 368.515625, "epoch": 0.12374886260236578, "format_confidence": 0.5, "grad_norm": 1.3879851931562597, "grounded_proportion": 0.5, "kl": 0.02783203125, "learning_rate": 8.771610555050046e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.3515625, "reward_std": 0.29143065214157104, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3515625, "step": 136 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.08333333333333333, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.5625, "TT_Math/mode_1": 0.6041666666666666, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 366.40625, "completion_length/mode_0": 374.453125, "completion_length/mode_1": 358.359375, "epoch": 0.12465878070973613, "format_confidence": 0.5, "grad_norm": 0.7633680054263763, "grounded_proportion": 0.5, "kl": 0.02880859375, "learning_rate": 8.762511373976341e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.47265625, "reward_std": 0.3259276747703552, "rewards/format_reward": 0.98828125, "rewards/general_task_reward": 0.484375, "step": 137 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5238095238095238, "TT_Math/mode_1": 0.4880952380952381, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.5, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.0, "completion_length": 412.8359375, "completion_length/mode_0": 415.609375, "completion_length/mode_1": 410.0625, "epoch": 0.12556869881710647, "format_confidence": 0.5, "grad_norm": 5.3443264326908135, "grounded_proportion": 0.5, "kl": 0.026611328125, "learning_rate": 8.753412192902638e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.3915102481842041, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 138 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.42045454545454547, "TT_Math/mode_1": 0.3181818181818182, "TT_OCR/mode_0": 0.6666666666666666, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.0, "completion_length": 423.390625, "completion_length/mode_0": 448.96875, "completion_length/mode_1": 397.8125, "epoch": 0.1264786169244768, "format_confidence": 0.5, "grad_norm": 6.032568642838766, "grounded_proportion": 0.5, "kl": 0.033203125, "learning_rate": 8.744313011828936e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.36328125, "reward_std": 0.308400422334671, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.36328125, "step": 139 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.4, "TT_Document/mode_1": 0.35, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6875, "TT_Math/mode_1": 0.703125, "TT_OCR/mode_0": 0.6666666666666666, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.041666666666666664, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.0, "completion_length": 343.56640625, "completion_length/mode_0": 358.5234375, "completion_length/mode_1": 328.609375, "epoch": 0.12738853503184713, "format_confidence": 0.5, "grad_norm": 0.6882157506953025, "grounded_proportion": 0.5, "kl": 0.037353515625, "learning_rate": 8.735213830755232e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.4375, "reward_std": 0.19792133569717407, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4375, "step": 140 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5476190476190477, "TT_Math/mode_1": 0.5476190476190477, "TT_OCR/mode_0": 0.6666666666666666, "TT_OCR/mode_1": 0.3333333333333333, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 1.0, "completion_length": 348.94140625, "completion_length/mode_0": 355.53125, "completion_length/mode_1": 342.3515625, "epoch": 0.12829845313921748, "format_confidence": 0.5, "grad_norm": 0.7339055309204625, "grounded_proportion": 0.5, "kl": 0.044189453125, "learning_rate": 8.726114649681528e-07, "loss": 0.0018, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.2690690755844116, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 141 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.375, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4852941176470588, "TT_Math/mode_1": 0.47058823529411764, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.16666666666666666, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.2, "TT_Science/mode_1": 0.25, "completion_length": 386.5390625, "completion_length/mode_0": 398.9375, "completion_length/mode_1": 374.140625, "epoch": 0.1292083712465878, "format_confidence": 0.5, "grad_norm": 0.5546463094726118, "grounded_proportion": 0.5, "kl": 0.0262451171875, "learning_rate": 8.717015468607825e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.33203125, "reward_std": 0.27130940556526184, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.33203125, "step": 142 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.35, "TT_Counting/mode_0": 0.4, "TT_Counting/mode_1": 0.2, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.375, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6041666666666666, "TT_Math/mode_1": 0.5625, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.875, "TT_Science/mode_1": 1.0, "completion_length": 287.44921875, "completion_length/mode_0": 305.828125, "completion_length/mode_1": 269.0703125, "epoch": 0.13011828935395814, "format_confidence": 0.5, "grad_norm": 0.889656894734258, "grounded_proportion": 0.5, "kl": 0.031982421875, "learning_rate": 8.707916287534122e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.4296875, "reward_std": 0.30077171325683594, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4296875, "step": 143 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.2916666666666667, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.375, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.45454545454545453, "TT_Math/mode_1": 0.45454545454545453, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.041666666666666664, "TT_Others/mode_1": 0.2916666666666667, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.3333333333333333, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 300.1171875, "completion_length/mode_0": 312.078125, "completion_length/mode_1": 288.15625, "epoch": 0.13102820746132848, "format_confidence": 0.5, "grad_norm": 0.8824978684287549, "grounded_proportion": 0.5, "kl": 0.03515625, "learning_rate": 8.698817106460417e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.31640625, "reward_std": 0.30931398272514343, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.31640625, "step": 144 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.125, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.375, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.5735294117647058, "TT_Math/mode_1": 0.5147058823529411, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.65, "TT_Science/mode_1": 0.5, "completion_length": 360.56640625, "completion_length/mode_0": 382.359375, "completion_length/mode_1": 338.7734375, "epoch": 0.13193812556869883, "format_confidence": 0.5, "grad_norm": 0.7721949198133805, "grounded_proportion": 0.5, "kl": 0.0274658203125, "learning_rate": 8.689717925386715e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.46875, "reward_std": 0.33547264337539673, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.46875, "step": 145 }, { "TT_Chart/mode_0": 0.875, "TT_Chart/mode_1": 0.75, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.4605263157894737, "TT_Math/mode_1": 0.4473684210526316, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 344.33203125, "completion_length/mode_0": 366.6171875, "completion_length/mode_1": 322.046875, "epoch": 0.13284804367606914, "format_confidence": 0.5, "grad_norm": 0.9909534863047289, "grounded_proportion": 0.5, "kl": 0.0238037109375, "learning_rate": 8.680618744313012e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.324045866727829, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 146 }, { "TT_Chart/mode_0": 0.875, "TT_Chart/mode_1": 0.75, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.08333333333333333, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 0.625, "TT_Math/mode_0": 0.3472222222222222, "TT_Math/mode_1": 0.375, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.3125, "completion_length": 347.421875, "completion_length/mode_0": 357.6640625, "completion_length/mode_1": 337.1796875, "epoch": 0.1337579617834395, "format_confidence": 0.5, "grad_norm": 0.720949141572624, "grounded_proportion": 0.5, "kl": 0.034423828125, "learning_rate": 8.671519563239307e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.328125, "reward_std": 0.2761722803115845, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.328125, "step": 147 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 1.0, "TT_Detection/mode_0": 0.3333333333333333, "TT_Detection/mode_1": 0.5833333333333334, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.6666666666666666, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5681818181818182, "TT_Math/mode_1": 0.5568181818181818, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.3125, "completion_length": 340.26171875, "completion_length/mode_0": 362.578125, "completion_length/mode_1": 317.9453125, "epoch": 0.13466787989080983, "format_confidence": 0.5, "grad_norm": 0.7763799064119764, "grounded_proportion": 0.5, "kl": 0.0400390625, "learning_rate": 8.662420382165605e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.53515625, "reward_std": 0.288013219833374, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.53515625, "step": 148 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.6666666666666666, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.4583333333333333, "TT_Math/mode_1": 0.4583333333333333, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 456.44140625, "completion_length/mode_0": 481.53125, "completion_length/mode_1": 431.3515625, "epoch": 0.13557779799818018, "format_confidence": 0.5, "grad_norm": 0.6076513635609712, "grounded_proportion": 0.5, "kl": 0.03271484375, "learning_rate": 8.653321201091901e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.40625, "reward_std": 0.25460314750671387, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.40625, "step": 149 }, { "TT_Chart/mode_0": 0.35, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.08333333333333333, "TT_Detection/mode_0": 0.3125, "TT_Detection/mode_1": 0.3125, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.48333333333333334, "TT_Math/mode_1": 0.5333333333333333, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.5, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 337.10546875, "completion_length/mode_0": 345.125, "completion_length/mode_1": 329.0859375, "epoch": 0.1364877161055505, "format_confidence": 0.5, "grad_norm": 0.7513140959836652, "grounded_proportion": 0.5, "kl": 0.048583984375, "learning_rate": 8.644222020018199e-07, "loss": 0.0019, "over_lengthy_sequences": 0.0, "reward": 1.390625, "reward_std": 0.2961748242378235, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.390625, "step": 150 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5972222222222222, "TT_Math/mode_1": 0.6388888888888888, "TT_OCR/mode_0": 0.4375, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 315.36328125, "completion_length/mode_0": 329.984375, "completion_length/mode_1": 300.7421875, "epoch": 0.13739763421292084, "format_confidence": 0.5, "grad_norm": 1.0275263126976402, "grounded_proportion": 0.5, "kl": 0.037841796875, "learning_rate": 8.635122838944494e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.3984375, "reward_std": 0.24172601103782654, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3984375, "step": 151 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.5131578947368421, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.625, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 313.54296875, "completion_length/mode_0": 326.3203125, "completion_length/mode_1": 300.765625, "epoch": 0.13830755232029118, "format_confidence": 0.5, "grad_norm": 0.9977927113854368, "grounded_proportion": 0.5, "kl": 0.0299072265625, "learning_rate": 8.626023657870791e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.28182509541511536, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 152 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.6666666666666666, "TT_Counting/mode_1": 0.5833333333333334, "TT_Detection/mode_0": 0.16666666666666666, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.125, "TT_Math/mode_0": 0.5833333333333334, "TT_Math/mode_1": 0.5555555555555556, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.875, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.5, "completion_length": 390.1796875, "completion_length/mode_0": 406.2421875, "completion_length/mode_1": 374.1171875, "epoch": 0.1392174704276615, "format_confidence": 0.5, "grad_norm": 0.7680531984092493, "grounded_proportion": 0.5, "kl": 0.040771484375, "learning_rate": 8.616924476797089e-07, "loss": 0.0016, "over_lengthy_sequences": 0.00390625, "reward": 1.47265625, "reward_std": 0.30024129152297974, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.4765625, "step": 153 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 1.0, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6666666666666666, "TT_Math/mode_1": 0.5694444444444444, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 256.26171875, "completion_length/mode_0": 282.7109375, "completion_length/mode_1": 229.8125, "epoch": 0.14012738853503184, "format_confidence": 0.5, "grad_norm": 0.8865413491194238, "grounded_proportion": 0.5, "kl": 0.04150390625, "learning_rate": 8.607825295723384e-07, "loss": 0.0017, "over_lengthy_sequences": 0.0, "reward": 1.43359375, "reward_std": 0.2194880098104477, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.43359375, "step": 154 }, { "TT_Chart/mode_0": 0.5416666666666666, "TT_Chart/mode_1": 0.5416666666666666, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.625, "TT_Math/mode_1": 0.575, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.375, "completion_length": 367.86328125, "completion_length/mode_0": 385.09375, "completion_length/mode_1": 350.6328125, "epoch": 0.1410373066424022, "format_confidence": 0.5, "grad_norm": 0.9428079277390882, "grounded_proportion": 0.5, "kl": 0.0299072265625, "learning_rate": 8.598726114649681e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.49609375, "reward_std": 0.32734215259552, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.5, "step": 155 }, { "TT_Chart/mode_0": 0.4, "TT_Chart/mode_1": 0.35, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 0.6875, "TT_Math/mode_0": 0.515625, "TT_Math/mode_1": 0.484375, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.875, "TT_Science/mode_1": 0.625, "completion_length": 298.578125, "completion_length/mode_0": 303.390625, "completion_length/mode_1": 293.765625, "epoch": 0.14194722474977253, "format_confidence": 0.5, "grad_norm": 2.2779976937162885, "grounded_proportion": 0.5, "kl": 0.06103515625, "learning_rate": 8.589626933575978e-07, "loss": 0.0024, "over_lengthy_sequences": 0.0, "reward": 1.453125, "reward_std": 0.2784101665019989, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.453125, "step": 156 }, { "TT_Chart/mode_0": 0.125, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.625, "TT_Document/mode_1": 0.625, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 0.6875, "TT_Math/mode_0": 0.6578947368421053, "TT_Math/mode_1": 0.631578947368421, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.4166666666666667, "TT_Science/mode_1": 0.75, "completion_length": 419.5, "completion_length/mode_0": 432.984375, "completion_length/mode_1": 406.015625, "epoch": 0.14285714285714285, "format_confidence": 0.5, "grad_norm": 0.8454361433490853, "grounded_proportion": 0.5, "kl": 0.03466796875, "learning_rate": 8.580527752502275e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.5234375, "reward_std": 0.2661820948123932, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.5234375, "step": 157 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 1.0, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.44047619047619047, "TT_Math/mode_1": 0.4523809523809524, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 350.96484375, "completion_length/mode_0": 365.6484375, "completion_length/mode_1": 336.28125, "epoch": 0.1437670609645132, "format_confidence": 0.5, "grad_norm": 0.41622087929958396, "grounded_proportion": 0.5, "kl": 0.03369140625, "learning_rate": 8.57142857142857e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.32421875, "reward_std": 0.14992907643318176, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.32421875, "step": 158 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.1875, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.3, "TT_Math/mode_0": 0.4, "TT_Math/mode_1": 0.3333333333333333, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 299.73828125, "completion_length/mode_0": 326.0546875, "completion_length/mode_1": 273.421875, "epoch": 0.14467697907188354, "format_confidence": 0.5, "grad_norm": 1.0658096903762169, "grounded_proportion": 0.5, "kl": 0.04296875, "learning_rate": 8.562329390354868e-07, "loss": 0.0017, "over_lengthy_sequences": 0.0, "reward": 1.28515625, "reward_std": 0.28972315788269043, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.28515625, "step": 159 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.3333333333333333, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 1.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.4868421052631579, "TT_Math/mode_1": 0.5131578947368421, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.16666666666666666, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.625, "TT_Science/mode_1": 0.4375, "completion_length": 399.140625, "completion_length/mode_0": 402.296875, "completion_length/mode_1": 395.984375, "epoch": 0.14558689717925385, "format_confidence": 0.5, "grad_norm": 1.3202914229642293, "grounded_proportion": 0.5, "kl": 0.0294189453125, "learning_rate": 8.553230209281165e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.47265625, "reward_std": 0.2205488383769989, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.47265625, "step": 160 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.16666666666666666, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.625, "TT_Math/mode_1": 0.5892857142857143, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.16666666666666666, "TT_Others/mode_1": 0.4166666666666667, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.16666666666666666, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 324.12109375, "completion_length/mode_0": 342.9296875, "completion_length/mode_1": 305.3125, "epoch": 0.1464968152866242, "format_confidence": 0.5, "grad_norm": 0.9445520944835173, "grounded_proportion": 0.5, "kl": 0.040771484375, "learning_rate": 8.54413102820746e-07, "loss": 0.0016, "over_lengthy_sequences": 0.00390625, "reward": 1.390625, "reward_std": 0.29116618633270264, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.39453125, "step": 161 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.65, "TT_Counting/mode_1": 0.65, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.36666666666666664, "TT_Math/mode_1": 0.43333333333333335, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.1875, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.16666666666666666, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 336.70703125, "completion_length/mode_0": 345.09375, "completion_length/mode_1": 328.3203125, "epoch": 0.14740673339399454, "format_confidence": 0.5, "grad_norm": 1.053194115355607, "grounded_proportion": 0.5, "kl": 0.043701171875, "learning_rate": 8.535031847133758e-07, "loss": 0.0017, "over_lengthy_sequences": 0.0, "reward": 1.3984375, "reward_std": 0.18345540761947632, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.40234375, "step": 162 }, { "TT_Chart/mode_0": 0.16666666666666666, "TT_Chart/mode_1": 0.3333333333333333, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5714285714285714, "TT_Math/mode_1": 0.5833333333333334, "TT_OCR/mode_0": 0.125, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.5833333333333334, "TT_Others/mode_1": 0.4166666666666667, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.16666666666666666, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 394.390625, "completion_length/mode_0": 414.21875, "completion_length/mode_1": 374.5625, "epoch": 0.1483166515013649, "format_confidence": 0.5, "grad_norm": 0.949326413337948, "grounded_proportion": 0.5, "kl": 0.03515625, "learning_rate": 8.525932666060054e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.4765625, "reward_std": 0.2801200747489929, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4765625, "step": 163 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.4166666666666667, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.375, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5375, "TT_Math/mode_1": 0.575, "TT_OCR/mode_0": 0.25, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.16666666666666666, "TT_Science/mode_1": 0.08333333333333333, "completion_length": 326.171875, "completion_length/mode_0": 345.1796875, "completion_length/mode_1": 307.1640625, "epoch": 0.1492265696087352, "format_confidence": 0.5, "grad_norm": 1.0393763681729113, "grounded_proportion": 0.5, "kl": 0.042236328125, "learning_rate": 8.516833484986351e-07, "loss": 0.0017, "over_lengthy_sequences": 0.0, "reward": 1.4140625, "reward_std": 0.2548676133155823, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4140625, "step": 164 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.08333333333333333, "TT_Detection/mode_0": 0.4166666666666667, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.375, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6166666666666667, "TT_Math/mode_1": 0.5333333333333333, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.25, "completion_length": 306.5078125, "completion_length/mode_0": 308.4765625, "completion_length/mode_1": 304.5390625, "epoch": 0.15013648771610555, "format_confidence": 0.5, "grad_norm": 1.4513886148665085, "grounded_proportion": 0.5, "kl": 0.039306640625, "learning_rate": 8.507734303912647e-07, "loss": 0.0016, "over_lengthy_sequences": 0.0, "reward": 1.45703125, "reward_std": 0.3153514266014099, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.45703125, "step": 165 }, { "TT_Chart/mode_0": 0.625, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.08333333333333333, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.36538461538461536, "TT_Math/mode_1": 0.40384615384615385, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.16666666666666666, "TT_Others/mode_1": 0.3333333333333333, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.08333333333333333, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 286.8515625, "completion_length/mode_0": 297.140625, "completion_length/mode_1": 276.5625, "epoch": 0.1510464058234759, "format_confidence": 0.5, "grad_norm": 1.232171973166691, "grounded_proportion": 0.5, "kl": 0.0673828125, "learning_rate": 8.498635122838944e-07, "loss": 0.0027, "over_lengthy_sequences": 0.0, "reward": 1.2578125, "reward_std": 0.3183930218219757, "rewards/format_reward": 0.98828125, "rewards/general_task_reward": 0.26953125, "step": 166 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.4375, "TT_Counting/mode_1": 0.3125, "TT_Detection/mode_0": 0.4166666666666667, "TT_Detection/mode_1": 0.4166666666666667, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5833333333333334, "TT_Math/mode_1": 0.5277777777777778, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.0, "TT_Puzzle/mode_1": 0.0, "TT_Science/mode_0": 0.08333333333333333, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 284.21484375, "completion_length/mode_0": 311.2109375, "completion_length/mode_1": 257.21875, "epoch": 0.15195632393084624, "format_confidence": 0.5, "grad_norm": 0.9435576850130559, "grounded_proportion": 0.5, "kl": 0.050048828125, "learning_rate": 8.489535941765242e-07, "loss": 0.002, "over_lengthy_sequences": 0.0, "reward": 1.4140625, "reward_std": 0.2546031177043915, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4140625, "step": 167 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.08333333333333333, "TT_Counting/mode_0": 0.4375, "TT_Counting/mode_1": 0.3125, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.3977272727272727, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 422.70703125, "completion_length/mode_0": 433.296875, "completion_length/mode_1": 412.1171875, "epoch": 0.15286624203821655, "format_confidence": 0.5, "grad_norm": 0.6448597730106973, "grounded_proportion": 0.5, "kl": 0.037109375, "learning_rate": 8.480436760691537e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.37109375, "reward_std": 0.27813929319381714, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.37109375, "step": 168 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.4375, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.609375, "TT_Math/mode_1": 0.578125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.08333333333333333, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 337.109375, "completion_length/mode_0": 346.390625, "completion_length/mode_1": 327.828125, "epoch": 0.1537761601455869, "format_confidence": 0.5, "grad_norm": 0.7151475312153581, "grounded_proportion": 0.5, "kl": 0.044921875, "learning_rate": 8.471337579617834e-07, "loss": 0.0018, "over_lengthy_sequences": 0.0, "reward": 1.39453125, "reward_std": 0.229889914393425, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.39453125, "step": 169 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.1875, "TT_Counting/mode_0": 1.0, "TT_Counting/mode_1": 1.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.28125, "TT_Math/mode_1": 0.296875, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.3, "completion_length": 304.55078125, "completion_length/mode_0": 310.8515625, "completion_length/mode_1": 298.25, "epoch": 0.15468607825295724, "format_confidence": 0.5, "grad_norm": 0.5636664720948796, "grounded_proportion": 0.5, "kl": 0.03173828125, "learning_rate": 8.462238398544131e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.30078125, "reward_std": 0.17939773201942444, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.30078125, "step": 170 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5555555555555556, "TT_Math/mode_1": 0.5555555555555556, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5833333333333334, "TT_Science/mode_1": 0.5, "completion_length": 359.69140625, "completion_length/mode_0": 369.3984375, "completion_length/mode_1": 349.984375, "epoch": 0.15559599636032756, "format_confidence": 0.5, "grad_norm": 1.0708410982269836, "grounded_proportion": 0.5, "kl": 0.033935546875, "learning_rate": 8.453139217470428e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.52734375, "reward_std": 0.23448191583156586, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.52734375, "step": 171 }, { "TT_Chart/mode_0": 0.4166666666666667, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.4852941176470588, "TT_Math/mode_1": 0.29411764705882354, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.9166666666666666, "TT_Science/mode_1": 0.8333333333333334, "completion_length": 343.12109375, "completion_length/mode_0": 352.09375, "completion_length/mode_1": 334.1484375, "epoch": 0.1565059144676979, "format_confidence": 0.5, "grad_norm": 0.9973888113787986, "grounded_proportion": 0.5, "kl": 0.046142578125, "learning_rate": 8.444040036396723e-07, "loss": 0.0018, "over_lengthy_sequences": 0.0, "reward": 1.3828125, "reward_std": 0.2741939425468445, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3828125, "step": 172 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 1.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.5096153846153846, "TT_Math/mode_1": 0.5288461538461539, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 1.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5, "completion_length": 492.6171875, "completion_length/mode_0": 512.953125, "completion_length/mode_1": 472.28125, "epoch": 0.15741583257506825, "format_confidence": 0.5, "grad_norm": 0.8266362533405296, "grounded_proportion": 0.5, "kl": 0.02978515625, "learning_rate": 8.434940855323021e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.48046875, "reward_std": 0.3358907699584961, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.484375, "step": 173 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6547619047619048, "TT_Math/mode_1": 0.6190476190476191, "TT_OCR/mode_0": 0.75, "TT_OCR/mode_1": 0.25, "TT_Others/mode_0": 1.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.0, "completion_length": 374.87890625, "completion_length/mode_0": 395.5234375, "completion_length/mode_1": 354.234375, "epoch": 0.1583257506824386, "format_confidence": 0.5, "grad_norm": 0.6723817621947202, "grounded_proportion": 0.5, "kl": 0.0294189453125, "learning_rate": 8.425841674249318e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.50390625, "reward_std": 0.2964407503604889, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.50390625, "step": 174 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.5833333333333334, "TT_Counting/mode_1": 0.6666666666666666, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.65, "TT_Math/mode_1": 0.575, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.125, "completion_length": 386.2890625, "completion_length/mode_0": 403.7734375, "completion_length/mode_1": 368.8046875, "epoch": 0.1592356687898089, "format_confidence": 0.5, "grad_norm": 0.6645566999099587, "grounded_proportion": 0.5, "kl": 0.0289306640625, "learning_rate": 8.416742493175613e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.5546875, "reward_std": 0.2982693314552307, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.5546875, "step": 175 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.6666666666666666, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5952380952380952, "TT_Math/mode_1": 0.5952380952380952, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.125, "completion_length": 376.30859375, "completion_length/mode_0": 382.4453125, "completion_length/mode_1": 370.171875, "epoch": 0.16014558689717925, "format_confidence": 0.5, "grad_norm": 0.7597524598153307, "grounded_proportion": 0.5, "kl": 0.02978515625, "learning_rate": 8.407643312101911e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.5078125, "reward_std": 0.23277443647384644, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.5078125, "step": 176 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0625, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.7142857142857143, "TT_Math/mode_1": 0.6607142857142857, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.25, "completion_length": 353.76953125, "completion_length/mode_0": 389.421875, "completion_length/mode_1": 318.1171875, "epoch": 0.1610555050045496, "format_confidence": 0.5, "grad_norm": 1.1293887912044984, "grounded_proportion": 0.5, "kl": 0.05078125, "learning_rate": 8.398544131028207e-07, "loss": 0.002, "over_lengthy_sequences": 0.0, "reward": 1.3828125, "reward_std": 0.2761722505092621, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3828125, "step": 177 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.7, "TT_Counting/mode_1": 0.7, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.6022727272727273, "TT_Math/mode_1": 0.5681818181818182, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.0, "completion_length": 306.37109375, "completion_length/mode_0": 315.796875, "completion_length/mode_1": 296.9453125, "epoch": 0.16196542311191992, "format_confidence": 0.5, "grad_norm": 0.7974369345469358, "grounded_proportion": 0.5, "kl": 0.033447265625, "learning_rate": 8.389444949954503e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.5234375, "reward_std": 0.2797393798828125, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.5234375, "step": 178 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.35, "TT_Math/mode_0": 0.5441176470588235, "TT_Math/mode_1": 0.47058823529411764, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.375, "completion_length": 322.7109375, "completion_length/mode_0": 335.171875, "completion_length/mode_1": 310.25, "epoch": 0.16287534121929026, "format_confidence": 0.5, "grad_norm": 0.8987778065024237, "grounded_proportion": 0.5, "kl": 0.034912109375, "learning_rate": 8.3803457688808e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.41796875, "reward_std": 0.2861511707305908, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41796875, "step": 179 }, { "TT_Chart/mode_0": 0.25, "TT_Chart/mode_1": 0.16666666666666666, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5263157894736842, "TT_Math/mode_1": 0.5394736842105263, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.15, "TT_Others/mode_1": 0.35, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.375, "completion_length": 348.140625, "completion_length/mode_0": 359.0703125, "completion_length/mode_1": 337.2109375, "epoch": 0.1637852593266606, "format_confidence": 0.5, "grad_norm": 0.7343429231062427, "grounded_proportion": 0.5, "kl": 0.03515625, "learning_rate": 8.371246587807097e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.2685386538505554, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 180 }, { "TT_Chart/mode_0": 0.75, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.5833333333333334, "TT_Detection/mode_0": 0.5, "TT_Detection/mode_1": 0.875, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5921052631578947, "TT_Math/mode_1": 0.5131578947368421, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.75, "TT_Others/mode_1": 0.6875, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.375, "completion_length": 364.34765625, "completion_length/mode_0": 393.75, "completion_length/mode_1": 334.9453125, "epoch": 0.16469517743403095, "format_confidence": 0.5, "grad_norm": 1.3940683409897012, "grounded_proportion": 0.5, "kl": 0.024658203125, "learning_rate": 8.362147406733395e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.546875, "reward_std": 0.37559884786605835, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.546875, "step": 181 }, { "TT_Chart/mode_0": 0.15, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5125, "TT_Math/mode_1": 0.475, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.75, "TT_Science/mode_1": 0.5, "completion_length": 342.953125, "completion_length/mode_0": 368.9765625, "completion_length/mode_1": 316.9296875, "epoch": 0.16560509554140126, "format_confidence": 0.5, "grad_norm": 0.7261784826800016, "grounded_proportion": 0.5, "kl": 0.0245361328125, "learning_rate": 8.35304822565969e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.41015625, "reward_std": 0.2590813636779785, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.41015625, "step": 182 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.08333333333333333, "TT_Counting/mode_0": 0.5, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.5833333333333334, "TT_Detection/mode_1": 0.5833333333333334, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.38235294117647056, "TT_Math/mode_1": 0.39705882352941174, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.16666666666666666, "TT_Science/mode_1": 0.16666666666666666, "completion_length": 321.29296875, "completion_length/mode_0": 325.5390625, "completion_length/mode_1": 317.046875, "epoch": 0.1665150136487716, "format_confidence": 0.5, "grad_norm": 0.690692859788369, "grounded_proportion": 0.5, "kl": 0.02587890625, "learning_rate": 8.343949044585987e-07, "loss": 0.001, "over_lengthy_sequences": 0.00390625, "reward": 1.31640625, "reward_std": 0.26170387864112854, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.3203125, "step": 183 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.6666666666666666, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.6666666666666666, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.625, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.6029411764705882, "TT_Math/mode_1": 0.5147058823529411, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.125, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.5, "completion_length": 299.9453125, "completion_length/mode_0": 306.9921875, "completion_length/mode_1": 292.8984375, "epoch": 0.16742493175614195, "format_confidence": 0.5, "grad_norm": 0.943115785852982, "grounded_proportion": 0.5, "kl": 0.033447265625, "learning_rate": 8.334849863512284e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.53125, "reward_std": 0.30076679587364197, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.53125, "step": 184 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.35, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.125, "TT_Grounding/mode_1": 0.375, "TT_Math/mode_0": 0.5694444444444444, "TT_Math/mode_1": 0.5416666666666666, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.125, "completion_length": 309.7265625, "completion_length/mode_0": 322.5703125, "completion_length/mode_1": 296.8828125, "epoch": 0.16833484986351227, "format_confidence": 0.5, "grad_norm": 0.8074492550195393, "grounded_proportion": 0.5, "kl": 0.03271484375, "learning_rate": 8.32575068243858e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.38671875, "reward_std": 0.28630331158638, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.38671875, "step": 185 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.8333333333333334, "TT_Counting/mode_0": 0.08333333333333333, "TT_Counting/mode_1": 0.16666666666666666, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.25, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.515625, "TT_Math/mode_1": 0.484375, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.25, "TT_Others/mode_1": 0.08333333333333333, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.125, "TT_Science/mode_1": 0.25, "completion_length": 353.81640625, "completion_length/mode_0": 383.046875, "completion_length/mode_1": 324.5859375, "epoch": 0.16924476797088261, "format_confidence": 0.5, "grad_norm": 3.2195277238429063, "grounded_proportion": 0.5, "kl": 0.035400390625, "learning_rate": 8.316651501364876e-07, "loss": 0.0014, "over_lengthy_sequences": 0.0, "reward": 1.39453125, "reward_std": 0.30312827229499817, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.39453125, "step": 186 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.25, "TT_Counting/mode_0": 0.3333333333333333, "TT_Counting/mode_1": 0.75, "TT_Detection/mode_0": 0.625, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.125, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.35, "TT_Grounding/mode_1": 0.45, "TT_Math/mode_0": 0.578125, "TT_Math/mode_1": 0.5, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.875, "completion_length": 340.77734375, "completion_length/mode_0": 350.6328125, "completion_length/mode_1": 330.921875, "epoch": 0.17015468607825296, "format_confidence": 0.5, "grad_norm": 1.0584895132267007, "grounded_proportion": 0.5, "kl": 0.0311279296875, "learning_rate": 8.307552320291174e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.4609375, "reward_std": 0.35466182231903076, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.46484375, "step": 187 }, { "TT_Chart/mode_0": 0.1875, "TT_Chart/mode_1": 0.1875, "TT_Counting/mode_0": 0.8125, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.625, "TT_Detection/mode_1": 0.25, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.35, "TT_Grounding/mode_1": 0.45, "TT_Math/mode_0": 0.546875, "TT_Math/mode_1": 0.453125, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.6, "TT_Science/mode_1": 0.45, "completion_length": 340.80859375, "completion_length/mode_0": 366.453125, "completion_length/mode_1": 315.1640625, "epoch": 0.1710646041856233, "format_confidence": 0.5, "grad_norm": 1.7060635808674018, "grounded_proportion": 0.5, "kl": 0.03271484375, "learning_rate": 8.298453139217471e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.49609375, "reward_std": 0.2823604345321655, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.49609375, "step": 188 }, { "TT_Chart/mode_0": 0.5, "TT_Chart/mode_1": 0.45, "TT_Counting/mode_0": 0.125, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.5833333333333334, "TT_Grounding/mode_1": 0.5833333333333334, "TT_Math/mode_0": 0.618421052631579, "TT_Math/mode_1": 0.5789473684210527, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 1.0, "TT_Others/mode_1": 1.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.25, "completion_length": 442.46875, "completion_length/mode_0": 476.4296875, "completion_length/mode_1": 408.5078125, "epoch": 0.17197452229299362, "format_confidence": 0.5, "grad_norm": 1.1713295543987994, "grounded_proportion": 0.5, "kl": 0.0224609375, "learning_rate": 8.289353958143766e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.53515625, "reward_std": 0.30076926946640015, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.53515625, "step": 189 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.375, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.3333333333333333, "TT_Detection/mode_1": 0.3333333333333333, "TT_Document/mode_0": 0.0, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.375, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.475, "TT_OCR/mode_0": 1.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.05, "TT_Others/mode_1": 0.15, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.55, "TT_Science/mode_1": 0.4, "completion_length": 260.03125, "completion_length/mode_0": 268.5, "completion_length/mode_1": 251.5625, "epoch": 0.17288444040036396, "format_confidence": 0.5, "grad_norm": 1.7958755613959498, "grounded_proportion": 0.5, "kl": 0.03857421875, "learning_rate": 8.280254777070064e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.32421875, "reward_std": 0.2951115369796753, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.32421875, "step": 190 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.0, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.25, "TT_Math/mode_0": 0.5125, "TT_Math/mode_1": 0.475, "TT_OCR/mode_0": 0.875, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.25, "TT_Science/mode_1": 0.25, "completion_length": 342.26171875, "completion_length/mode_0": 358.6328125, "completion_length/mode_1": 325.890625, "epoch": 0.1737943585077343, "format_confidence": 0.5, "grad_norm": 1.119909562752138, "grounded_proportion": 0.5, "kl": 0.036376953125, "learning_rate": 8.27115559599636e-07, "loss": 0.0015, "over_lengthy_sequences": 0.0, "reward": 1.421875, "reward_std": 0.29906177520751953, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.421875, "step": 191 }, { "TT_Chart/mode_0": 0.375, "TT_Chart/mode_1": 0.375, "TT_Counting/mode_0": 0.4166666666666667, "TT_Counting/mode_1": 0.25, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.75, "TT_Math/mode_0": 0.4722222222222222, "TT_Math/mode_1": 0.4305555555555556, "TT_OCR/mode_0": 0.625, "TT_OCR/mode_1": 0.125, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5, "completion_length": 397.421875, "completion_length/mode_0": 413.5703125, "completion_length/mode_1": 381.2734375, "epoch": 0.17470427661510465, "format_confidence": 0.5, "grad_norm": 1.2286906019231645, "grounded_proportion": 0.5, "kl": 0.0311279296875, "learning_rate": 8.262056414922656e-07, "loss": 0.0012, "over_lengthy_sequences": 0.00390625, "reward": 1.41015625, "reward_std": 0.32666200399398804, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.4140625, "step": 192 }, { "TT_Chart/mode_0": 0.35, "TT_Chart/mode_1": 0.5, "TT_Counting/mode_0": 0.625, "TT_Counting/mode_1": 0.375, "TT_Detection/mode_0": 0.6666666666666666, "TT_Detection/mode_1": 0.6666666666666666, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.6666666666666666, "TT_Grounding/mode_1": 0.6666666666666666, "TT_Math/mode_0": 0.4, "TT_Math/mode_1": 0.45, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.25, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.5, "completion_length": 359.20703125, "completion_length/mode_0": 382.375, "completion_length/mode_1": 336.0390625, "epoch": 0.17561419472247497, "format_confidence": 0.5, "grad_norm": 0.909737373388406, "grounded_proportion": 0.5, "kl": 0.033447265625, "learning_rate": 8.252957233848953e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.4765625, "reward_std": 0.3333781063556671, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.4765625, "step": 193 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.75, "TT_Counting/mode_1": 0.625, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 0.75, "TT_Document/mode_1": 0.25, "TT_Grounding/mode_0": 0.0, "TT_Grounding/mode_1": 0.0, "TT_Math/mode_0": 0.5227272727272727, "TT_Math/mode_1": 0.4431818181818182, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.0, "completion_length": 405.328125, "completion_length/mode_0": 413.921875, "completion_length/mode_1": 396.734375, "epoch": 0.17652411282984531, "format_confidence": 0.5, "grad_norm": 1.000256041324093, "grounded_proportion": 0.5, "kl": 0.033935546875, "learning_rate": 8.24385805277525e-07, "loss": 0.0014, "over_lengthy_sequences": 0.00390625, "reward": 1.390625, "reward_std": 0.31116873025894165, "rewards/format_reward": 0.99609375, "rewards/general_task_reward": 0.39453125, "step": 194 }, { "TT_Chart/mode_0": 0.041666666666666664, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.75, "TT_Detection/mode_1": 0.75, "TT_Document/mode_0": 0.5, "TT_Document/mode_1": 0.0, "TT_Grounding/mode_0": 0.625, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.4264705882352941, "TT_Math/mode_1": 0.5735294117647058, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.375, "TT_Science/mode_1": 0.125, "completion_length": 341.66015625, "completion_length/mode_0": 353.7109375, "completion_length/mode_1": 329.609375, "epoch": 0.17743403093721566, "format_confidence": 0.5, "grad_norm": 0.5359895637363522, "grounded_proportion": 0.5, "kl": 0.023681640625, "learning_rate": 8.234758871701548e-07, "loss": 0.0009, "over_lengthy_sequences": 0.0, "reward": 1.3515625, "reward_std": 0.2563130855560303, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3515625, "step": 195 }, { "TT_Chart/mode_0": 0.4166666666666667, "TT_Chart/mode_1": 0.16666666666666666, "TT_Counting/mode_0": 0.6666666666666666, "TT_Counting/mode_1": 0.6666666666666666, "TT_Detection/mode_0": 0.0, "TT_Detection/mode_1": 0.125, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.1875, "TT_Grounding/mode_1": 0.1875, "TT_Math/mode_0": 0.46875, "TT_Math/mode_1": 0.453125, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 0.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 1.0, "TT_Science/mode_1": 0.75, "completion_length": 283.7734375, "completion_length/mode_0": 304.7109375, "completion_length/mode_1": 262.8359375, "epoch": 0.17834394904458598, "format_confidence": 0.5, "grad_norm": 9.681747982191952, "grounded_proportion": 0.5, "kl": 0.0322265625, "learning_rate": 8.225659690627843e-07, "loss": 0.0013, "over_lengthy_sequences": 0.0, "reward": 1.3984375, "reward_std": 0.28630968928337097, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3984375, "step": 196 }, { "TT_Chart/mode_0": 1.0, "TT_Chart/mode_1": 1.0, "TT_Counting/mode_0": 0.25, "TT_Counting/mode_1": 0.5, "TT_Detection/mode_0": 0.25, "TT_Detection/mode_1": 0.16666666666666666, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 0.5, "TT_Grounding/mode_0": 0.5, "TT_Grounding/mode_1": 0.5, "TT_Math/mode_0": 0.35714285714285715, "TT_Math/mode_1": 0.30952380952380953, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.5, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.0, "TT_Science/mode_1": 0.25, "completion_length": 424.5859375, "completion_length/mode_0": 437.5, "completion_length/mode_1": 411.671875, "epoch": 0.17925386715195632, "format_confidence": 0.5, "grad_norm": 0.5628841267736974, "grounded_proportion": 0.5, "kl": 0.0281982421875, "learning_rate": 8.21656050955414e-07, "loss": 0.0011, "over_lengthy_sequences": 0.0, "reward": 1.3515625, "reward_std": 0.23250606656074524, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3515625, "step": 197 }, { "TT_Chart/mode_0": 0.08333333333333333, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.125, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.5, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.625, "TT_Grounding/mode_1": 0.875, "TT_Math/mode_0": 0.4722222222222222, "TT_Math/mode_1": 0.4861111111111111, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.4166666666666667, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.5, "TT_Science/mode_1": 0.0, "completion_length": 304.98046875, "completion_length/mode_0": 306.78125, "completion_length/mode_1": 303.1796875, "epoch": 0.18016378525932666, "format_confidence": 0.5, "grad_norm": 1.2573499359794245, "grounded_proportion": 0.5, "kl": 0.0247802734375, "learning_rate": 8.207461328480437e-07, "loss": 0.001, "over_lengthy_sequences": 0.0, "reward": 1.42578125, "reward_std": 0.24461543560028076, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.42578125, "step": 198 }, { "TT_Chart/mode_0": 0.6666666666666666, "TT_Chart/mode_1": 0.6666666666666666, "TT_Counting/mode_0": 0.0, "TT_Counting/mode_1": 0.0, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 1.0, "TT_Document/mode_1": 1.0, "TT_Grounding/mode_0": 0.16666666666666666, "TT_Grounding/mode_1": 0.16666666666666666, "TT_Math/mode_0": 0.5, "TT_Math/mode_1": 0.5333333333333333, "TT_OCR/mode_0": 0.5, "TT_OCR/mode_1": 0.375, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.0, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.625, "TT_Science/mode_1": 0.375, "completion_length": 366.18359375, "completion_length/mode_0": 374.296875, "completion_length/mode_1": 358.0703125, "epoch": 0.181073703366697, "format_confidence": 0.5, "grad_norm": 0.763912879776671, "grounded_proportion": 0.5, "kl": 0.0439453125, "learning_rate": 8.198362147406733e-07, "loss": 0.0018, "over_lengthy_sequences": 0.0, "reward": 1.45703125, "reward_std": 0.3085215389728546, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.45703125, "step": 199 }, { "TT_Chart/mode_0": 0.0, "TT_Chart/mode_1": 0.0, "TT_Counting/mode_0": 0.4375, "TT_Counting/mode_1": 0.4375, "TT_Detection/mode_0": 0.125, "TT_Detection/mode_1": 0.625, "TT_Document/mode_0": 0.25, "TT_Document/mode_1": 0.75, "TT_Grounding/mode_0": 1.0, "TT_Grounding/mode_1": 1.0, "TT_Math/mode_0": 0.39285714285714285, "TT_Math/mode_1": 0.40476190476190477, "TT_OCR/mode_0": 0.0, "TT_OCR/mode_1": 1.0, "TT_Others/mode_0": 0.0, "TT_Others/mode_1": 0.125, "TT_Puzzle/mode_0": 0.75, "TT_Puzzle/mode_1": 0.5, "TT_Science/mode_0": 0.625, "TT_Science/mode_1": 0.375, "completion_length": 342.05859375, "completion_length/mode_0": 350.3359375, "completion_length/mode_1": 333.78125, "epoch": 0.18198362147406733, "format_confidence": 0.5, "grad_norm": 1.010417481464837, "grounded_proportion": 0.5, "kl": 0.0294189453125, "learning_rate": 8.189262966333029e-07, "loss": 0.0012, "over_lengthy_sequences": 0.0, "reward": 1.3828125, "reward_std": 0.2797393798828125, "rewards/format_reward": 1.0, "rewards/general_task_reward": 0.3828125, "step": 200 } ], "logging_steps": 1.0, "max_steps": 1099, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }