| { |
| "best_metric": 0.34333334282040595, |
| "best_model_checkpoint": "/mnt/data/user/zhao_jun/tangjixin/output/model/qwen2.5vl-7b-grpo_new_v20_5k/v13-20250325-021847/checkpoint-2475", |
| "epoch": 1.0, |
| "eval_steps": 250, |
| "global_step": 2475, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.125, |
| "epoch": 0.00040404040404040404, |
| "grad_norm": 1.364031546421686, |
| "kl": 0.0, |
| "learning_rate": 1.6129032258064515e-09, |
| "loss": -0.0474996417760849, |
| "memory(GiB)": 81.93, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333432674408, |
| "reward_std": 0.25746434926986694, |
| "rewards/MultiModalAccuracyORM": 0.2083333432674408, |
| "step": 1, |
| "train_speed(iter/s)": 0.005983 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 304.95833945274353, |
| "epoch": 0.00202020202020202, |
| "grad_norm": 1.6130071483346196, |
| "kl": 0.00015279650688171387, |
| "learning_rate": 8.064516129032257e-09, |
| "loss": -0.0010303221642971039, |
| "memory(GiB)": 86.73, |
| "response_clip_ratio": 0.0, |
| "reward": 0.052083334885537624, |
| "reward_std": 0.13339675217866898, |
| "rewards/MultiModalAccuracyORM": 0.052083334885537624, |
| "step": 5, |
| "train_speed(iter/s)": 0.019266 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 297.46667594909667, |
| "epoch": 0.00404040404040404, |
| "grad_norm": 1.760454082663187, |
| "kl": 0.000270843505859375, |
| "learning_rate": 1.6129032258064514e-08, |
| "loss": 0.005405974388122558, |
| "memory(GiB)": 87.09, |
| "response_clip_ratio": 0.0, |
| "reward": 0.14166667312383652, |
| "reward_std": 0.26492767333984374, |
| "rewards/MultiModalAccuracyORM": 0.14166667312383652, |
| "step": 10, |
| "train_speed(iter/s)": 0.026623 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 452.308349609375, |
| "epoch": 0.006060606060606061, |
| "grad_norm": 1.1507264780517972, |
| "kl": 0.0002508640289306641, |
| "learning_rate": 2.4193548387096773e-08, |
| "loss": 0.013352996110916138, |
| "memory(GiB)": 87.09, |
| "response_clip_ratio": 0.02500000074505806, |
| "reward": 0.34166667610406876, |
| "reward_std": 0.36744636595249175, |
| "rewards/MultiModalAccuracyORM": 0.34166667610406876, |
| "step": 15, |
| "train_speed(iter/s)": 0.027725 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 291.9916717529297, |
| "epoch": 0.00808080808080808, |
| "grad_norm": 1.9440298564534324, |
| "kl": 0.00028104782104492186, |
| "learning_rate": 3.225806451612903e-08, |
| "loss": 0.006416285037994384, |
| "memory(GiB)": 87.09, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333373069763, |
| "reward_std": 0.2916341096162796, |
| "rewards/MultiModalAccuracyORM": 0.2833333373069763, |
| "step": 20, |
| "train_speed(iter/s)": 0.031051 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 378.5500061035156, |
| "epoch": 0.010101010101010102, |
| "grad_norm": 1.6907685802618988, |
| "kl": 0.0002666950225830078, |
| "learning_rate": 4.032258064516129e-08, |
| "loss": -0.018301564455032348, |
| "memory(GiB)": 87.09, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833334624767306, |
| "reward_std": 0.3720185041427612, |
| "rewards/MultiModalAccuracyORM": 0.30833334624767306, |
| "step": 25, |
| "train_speed(iter/s)": 0.032339 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.2333450317383, |
| "epoch": 0.012121212121212121, |
| "grad_norm": 1.5722363224769262, |
| "kl": 0.0002593994140625, |
| "learning_rate": 4.8387096774193546e-08, |
| "loss": -0.027563482522964478, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000596046446, |
| "reward_std": 0.3226982891559601, |
| "rewards/MultiModalAccuracyORM": 0.25000000596046446, |
| "step": 30, |
| "train_speed(iter/s)": 0.032649 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 398.5916778564453, |
| "epoch": 0.014141414141414142, |
| "grad_norm": 2.304234213678912, |
| "kl": 0.00022954940795898436, |
| "learning_rate": 5.645161290322581e-08, |
| "loss": 0.048061671853065493, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.1416666716337204, |
| "reward_std": 0.3226627051830292, |
| "rewards/MultiModalAccuracyORM": 0.1416666716337204, |
| "step": 35, |
| "train_speed(iter/s)": 0.033014 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 274.97500972747804, |
| "epoch": 0.01616161616161616, |
| "grad_norm": 1.6894032790709004, |
| "kl": 0.0002648591995239258, |
| "learning_rate": 6.451612903225806e-08, |
| "loss": 0.012092837691307068, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666753590107, |
| "reward_std": 0.222271066904068, |
| "rewards/MultiModalAccuracyORM": 0.2666666753590107, |
| "step": 40, |
| "train_speed(iter/s)": 0.034411 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 421.9333435058594, |
| "epoch": 0.01818181818181818, |
| "grad_norm": 1.9171038477045215, |
| "kl": 0.00023059844970703126, |
| "learning_rate": 7.258064516129032e-08, |
| "loss": -0.0132610023021698, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333879709244, |
| "reward_std": 0.2489179015159607, |
| "rewards/MultiModalAccuracyORM": 0.15833333879709244, |
| "step": 45, |
| "train_speed(iter/s)": 0.034702 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 444.20001525878905, |
| "epoch": 0.020202020202020204, |
| "grad_norm": 1.795783985834061, |
| "kl": 0.00021610260009765624, |
| "learning_rate": 8.064516129032257e-08, |
| "loss": 0.055432689189910886, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333730697633, |
| "reward_std": 0.320406436920166, |
| "rewards/MultiModalAccuracyORM": 0.13333333730697633, |
| "step": 50, |
| "train_speed(iter/s)": 0.034713 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 271.8500068664551, |
| "epoch": 0.022222222222222223, |
| "grad_norm": 1.570392013394559, |
| "kl": 0.00024003982543945311, |
| "learning_rate": 8.870967741935484e-08, |
| "loss": 0.0527652382850647, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000968575477, |
| "reward_std": 0.24862808585166932, |
| "rewards/MultiModalAccuracyORM": 0.17500000968575477, |
| "step": 55, |
| "train_speed(iter/s)": 0.035397 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 240.03333892822266, |
| "epoch": 0.024242424242424242, |
| "grad_norm": 1.7404447091659765, |
| "kl": 0.00024061203002929689, |
| "learning_rate": 9.677419354838709e-08, |
| "loss": -0.06867231130599975, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667982935904, |
| "reward_std": 0.33052347004413607, |
| "rewards/MultiModalAccuracyORM": 0.39166667982935904, |
| "step": 60, |
| "train_speed(iter/s)": 0.036121 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 449.5083480834961, |
| "epoch": 0.026262626262626262, |
| "grad_norm": 1.770871195621109, |
| "kl": 0.0002596855163574219, |
| "learning_rate": 1.0483870967741934e-07, |
| "loss": 0.019220371544361115, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.1416666701436043, |
| "reward_std": 0.27753120064735415, |
| "rewards/MultiModalAccuracyORM": 0.1416666701436043, |
| "step": 65, |
| "train_speed(iter/s)": 0.035829 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.05834197998047, |
| "epoch": 0.028282828282828285, |
| "grad_norm": 1.1236406922162803, |
| "kl": 0.00025534629821777344, |
| "learning_rate": 1.1290322580645162e-07, |
| "loss": 0.006563323736190796, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833334252238274, |
| "reward_std": 0.18108985424041749, |
| "rewards/MultiModalAccuracyORM": 0.15833334252238274, |
| "step": 70, |
| "train_speed(iter/s)": 0.036273 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.05833969116213, |
| "epoch": 0.030303030303030304, |
| "grad_norm": 2.2244576725130276, |
| "kl": 0.00026721954345703124, |
| "learning_rate": 1.2096774193548387e-07, |
| "loss": 0.021188412606716157, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333805203437, |
| "reward_std": 0.3494287371635437, |
| "rewards/MultiModalAccuracyORM": 0.28333333805203437, |
| "step": 75, |
| "train_speed(iter/s)": 0.036577 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 365.70000381469725, |
| "epoch": 0.03232323232323232, |
| "grad_norm": 2.238393674944575, |
| "kl": 0.00026388168334960936, |
| "learning_rate": 1.2903225806451611e-07, |
| "loss": 0.029351598024368285, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.01666666716337204, |
| "reward": 0.22500000521540642, |
| "reward_std": 0.279270276427269, |
| "rewards/MultiModalAccuracyORM": 0.22500000521540642, |
| "step": 80, |
| "train_speed(iter/s)": 0.036263 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 245.05000381469728, |
| "epoch": 0.03434343434343434, |
| "grad_norm": 1.5092959560425367, |
| "kl": 0.00028471946716308595, |
| "learning_rate": 1.3709677419354838e-07, |
| "loss": -0.036607831716537476, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334177732467, |
| "reward_std": 0.39707074165344236, |
| "rewards/MultiModalAccuracyORM": 0.28333334177732467, |
| "step": 85, |
| "train_speed(iter/s)": 0.035112 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.3000152587891, |
| "epoch": 0.03636363636363636, |
| "grad_norm": 1.983727747725694, |
| "kl": 0.0002570152282714844, |
| "learning_rate": 1.4516129032258064e-07, |
| "loss": 0.02973529100418091, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000447034836, |
| "reward_std": 0.27928483188152314, |
| "rewards/MultiModalAccuracyORM": 0.17500000447034836, |
| "step": 90, |
| "train_speed(iter/s)": 0.035019 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 420.7333511352539, |
| "epoch": 0.03838383838383838, |
| "grad_norm": 1.6243054678942601, |
| "kl": 0.00022783279418945313, |
| "learning_rate": 1.5322580645161288e-07, |
| "loss": -0.030441620945930482, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.35868159830570223, |
| "rewards/MultiModalAccuracyORM": 0.23333333805203438, |
| "step": 95, |
| "train_speed(iter/s)": 0.035038 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 320.6583419799805, |
| "epoch": 0.04040404040404041, |
| "grad_norm": 1.5278965004190905, |
| "kl": 0.00023970603942871093, |
| "learning_rate": 1.6129032258064515e-07, |
| "loss": 0.014825087785720826, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333417773247, |
| "reward_std": 0.24560283720493317, |
| "rewards/MultiModalAccuracyORM": 0.3833333417773247, |
| "step": 100, |
| "train_speed(iter/s)": 0.035336 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.6000091552734, |
| "epoch": 0.04242424242424243, |
| "grad_norm": 2.275003739183734, |
| "kl": 0.0002989768981933594, |
| "learning_rate": 1.6935483870967741e-07, |
| "loss": 0.021370184421539307, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333410322666, |
| "reward_std": 0.31520852744579314, |
| "rewards/MultiModalAccuracyORM": 0.3083333410322666, |
| "step": 105, |
| "train_speed(iter/s)": 0.035535 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.37500915527346, |
| "epoch": 0.044444444444444446, |
| "grad_norm": 1.3264840189361857, |
| "kl": 0.00028629302978515624, |
| "learning_rate": 1.7741935483870968e-07, |
| "loss": 0.013422733545303345, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1416666701436043, |
| "reward_std": 0.24885829985141755, |
| "rewards/MultiModalAccuracyORM": 0.1416666701436043, |
| "step": 110, |
| "train_speed(iter/s)": 0.035867 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.7583488464355, |
| "epoch": 0.046464646464646465, |
| "grad_norm": 0.0068729642108505875, |
| "kl": 0.00022754669189453124, |
| "learning_rate": 1.8548387096774192e-07, |
| "loss": 0.007101482152938843, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.01666666716337204, |
| "reward": 0.24166667386889457, |
| "reward_std": 0.23854664266109465, |
| "rewards/MultiModalAccuracyORM": 0.24166667386889457, |
| "step": 115, |
| "train_speed(iter/s)": 0.035529 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 358.0500122070313, |
| "epoch": 0.048484848484848485, |
| "grad_norm": 1.666888807483155, |
| "kl": 0.00029277801513671875, |
| "learning_rate": 1.9354838709677418e-07, |
| "loss": -0.013055479526519776, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833334028720857, |
| "reward_std": 0.24041947722434998, |
| "rewards/MultiModalAccuracyORM": 0.20833334028720857, |
| "step": 120, |
| "train_speed(iter/s)": 0.035843 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.0916717529297, |
| "epoch": 0.050505050505050504, |
| "grad_norm": 3.6057797063570765, |
| "kl": 0.00020406246185302734, |
| "learning_rate": 2e-07, |
| "loss": 0.029223644733428956, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000052154064, |
| "reward_std": 0.27371591329574585, |
| "rewards/MultiModalAccuracyORM": 0.2750000052154064, |
| "step": 125, |
| "train_speed(iter/s)": 0.036026 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 488.7583526611328, |
| "epoch": 0.052525252525252523, |
| "grad_norm": 1.7900187922950372, |
| "kl": 0.00025043487548828127, |
| "learning_rate": 2e-07, |
| "loss": 0.0551780104637146, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333484828472, |
| "reward_std": 0.3641817569732666, |
| "rewards/MultiModalAccuracyORM": 0.2833333484828472, |
| "step": 130, |
| "train_speed(iter/s)": 0.036075 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.5000072479248, |
| "epoch": 0.05454545454545454, |
| "grad_norm": 2.529917707084592, |
| "kl": 0.0002529144287109375, |
| "learning_rate": 2e-07, |
| "loss": 0.02438216805458069, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.3416666708886623, |
| "reward_std": 0.279270276427269, |
| "rewards/MultiModalAccuracyORM": 0.3416666708886623, |
| "step": 135, |
| "train_speed(iter/s)": 0.036092 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 373.6333465576172, |
| "epoch": 0.05656565656565657, |
| "grad_norm": 1.3049814649570146, |
| "kl": 0.0002875804901123047, |
| "learning_rate": 2e-07, |
| "loss": -0.022501662373542786, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333469927311, |
| "reward_std": 0.34958777129650115, |
| "rewards/MultiModalAccuracyORM": 0.3833333469927311, |
| "step": 140, |
| "train_speed(iter/s)": 0.036095 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.41668395996095, |
| "epoch": 0.05858585858585859, |
| "grad_norm": 1.8437868971897566, |
| "kl": 0.00023627281188964844, |
| "learning_rate": 2e-07, |
| "loss": 0.06273630857467652, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.3666666768491268, |
| "reward_std": 0.3914994150400162, |
| "rewards/MultiModalAccuracyORM": 0.3666666768491268, |
| "step": 145, |
| "train_speed(iter/s)": 0.036226 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 266.51667098999025, |
| "epoch": 0.06060606060606061, |
| "grad_norm": 1.0785517011291799, |
| "kl": 0.00021938085556030273, |
| "learning_rate": 2e-07, |
| "loss": 0.02771698534488678, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.400000012665987, |
| "reward_std": 0.3516494154930115, |
| "rewards/MultiModalAccuracyORM": 0.400000012665987, |
| "step": 150, |
| "train_speed(iter/s)": 0.036427 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 333.3500152587891, |
| "epoch": 0.06262626262626263, |
| "grad_norm": 12.619972342482905, |
| "kl": 0.00030460357666015623, |
| "learning_rate": 2e-07, |
| "loss": -0.06058757305145264, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000074505806, |
| "reward_std": 0.37600439190864565, |
| "rewards/MultiModalAccuracyORM": 0.2250000074505806, |
| "step": 155, |
| "train_speed(iter/s)": 0.036609 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 358.7416732788086, |
| "epoch": 0.06464646464646465, |
| "grad_norm": 1.306377595968382, |
| "kl": 0.00027475357055664065, |
| "learning_rate": 2e-07, |
| "loss": -0.00979010909795761, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.15833333432674407, |
| "reward_std": 0.28456337153911593, |
| "rewards/MultiModalAccuracyORM": 0.15833333432674407, |
| "step": 160, |
| "train_speed(iter/s)": 0.036431 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.9916763305664, |
| "epoch": 0.06666666666666667, |
| "grad_norm": 0.9830762972924579, |
| "kl": 0.00030498504638671876, |
| "learning_rate": 2e-07, |
| "loss": -0.008201467990875243, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.2260383188724518, |
| "rewards/MultiModalAccuracyORM": 0.10000000149011612, |
| "step": 165, |
| "train_speed(iter/s)": 0.036665 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 249.37500610351563, |
| "epoch": 0.06868686868686869, |
| "grad_norm": 2.1917101699979287, |
| "kl": 0.00025620460510253904, |
| "learning_rate": 2e-07, |
| "loss": 0.016992685198783875, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000037252903, |
| "reward_std": 0.330559054017067, |
| "rewards/MultiModalAccuracyORM": 0.3500000037252903, |
| "step": 170, |
| "train_speed(iter/s)": 0.036951 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 358.87500762939453, |
| "epoch": 0.0707070707070707, |
| "grad_norm": 1.0748542635448965, |
| "kl": 0.0002711296081542969, |
| "learning_rate": 2e-07, |
| "loss": 0.010954010486602783, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.22400068640708923, |
| "rewards/MultiModalAccuracyORM": 0.20833333656191827, |
| "step": 175, |
| "train_speed(iter/s)": 0.037203 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 313.62500762939453, |
| "epoch": 0.07272727272727272, |
| "grad_norm": 2.2725379948331543, |
| "kl": 0.00025653839111328125, |
| "learning_rate": 2e-07, |
| "loss": 0.03469780087471008, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.16666667237877847, |
| "reward_std": 0.3332285821437836, |
| "rewards/MultiModalAccuracyORM": 0.16666667237877847, |
| "step": 180, |
| "train_speed(iter/s)": 0.037355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 271.96667327880857, |
| "epoch": 0.07474747474747474, |
| "grad_norm": 1.4486054691502512, |
| "kl": 0.0002918243408203125, |
| "learning_rate": 2e-07, |
| "loss": -0.009673595428466797, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333373069763, |
| "reward_std": 0.102961727976799, |
| "rewards/MultiModalAccuracyORM": 0.3083333373069763, |
| "step": 185, |
| "train_speed(iter/s)": 0.037564 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.25834197998046, |
| "epoch": 0.07676767676767676, |
| "grad_norm": 3.170971594101629, |
| "kl": 0.00025038719177246095, |
| "learning_rate": 2e-07, |
| "loss": 0.0012440800666809082, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.01666666716337204, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.30789810717105864, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 190, |
| "train_speed(iter/s)": 0.037415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.62500991821287, |
| "epoch": 0.07878787878787878, |
| "grad_norm": 1.98318367969525, |
| "kl": 0.00029687881469726564, |
| "learning_rate": 2e-07, |
| "loss": 0.008435648679733277, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.24741607010364533, |
| "rewards/MultiModalAccuracyORM": 0.33333334028720857, |
| "step": 195, |
| "train_speed(iter/s)": 0.037342 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.6333465576172, |
| "epoch": 0.08080808080808081, |
| "grad_norm": 1.503273341785427, |
| "kl": 0.000333404541015625, |
| "learning_rate": 2e-07, |
| "loss": 0.005708768963813782, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667237877845, |
| "reward_std": 0.3603756338357925, |
| "rewards/MultiModalAccuracyORM": 0.26666667237877845, |
| "step": 200, |
| "train_speed(iter/s)": 0.037521 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.68334407806395, |
| "epoch": 0.08282828282828283, |
| "grad_norm": 0.5199716532978094, |
| "kl": 0.0004832744598388672, |
| "learning_rate": 2e-07, |
| "loss": -0.014856468141078948, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.33937130570411683, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 205, |
| "train_speed(iter/s)": 0.037585 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 305.3916732788086, |
| "epoch": 0.08484848484848485, |
| "grad_norm": 2.1287930828371358, |
| "kl": 0.000292205810546875, |
| "learning_rate": 2e-07, |
| "loss": 0.001297689974308014, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.32771685123443606, |
| "rewards/MultiModalAccuracyORM": 0.21666667386889457, |
| "step": 210, |
| "train_speed(iter/s)": 0.037751 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.49167213439944, |
| "epoch": 0.08686868686868687, |
| "grad_norm": 1.7796242872827708, |
| "kl": 0.00042543411254882815, |
| "learning_rate": 2e-07, |
| "loss": -0.006988461315631867, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333656191826, |
| "reward_std": 0.2692273885011673, |
| "rewards/MultiModalAccuracyORM": 0.23333333656191826, |
| "step": 215, |
| "train_speed(iter/s)": 0.037754 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 314.81667442321776, |
| "epoch": 0.08888888888888889, |
| "grad_norm": 1.7638027896241226, |
| "kl": 0.0006679534912109375, |
| "learning_rate": 2e-07, |
| "loss": 0.006352822482585907, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333507180214, |
| "reward_std": 0.25008893609046934, |
| "rewards/MultiModalAccuracyORM": 0.15833333507180214, |
| "step": 220, |
| "train_speed(iter/s)": 0.03785 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 311.2750076293945, |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.012708836578688367, |
| "kl": 0.00029745101928710935, |
| "learning_rate": 2e-07, |
| "loss": 0.0504034161567688, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000149011613, |
| "reward_std": 0.3164917230606079, |
| "rewards/MultiModalAccuracyORM": 0.30000000149011613, |
| "step": 225, |
| "train_speed(iter/s)": 0.038015 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 265.6750061035156, |
| "epoch": 0.09292929292929293, |
| "grad_norm": 2.064611776487197, |
| "kl": 0.000385284423828125, |
| "learning_rate": 2e-07, |
| "loss": 0.07023286819458008, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000298023225, |
| "reward_std": 0.2650228708982468, |
| "rewards/MultiModalAccuracyORM": 0.15000000298023225, |
| "step": 230, |
| "train_speed(iter/s)": 0.03818 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 371.5416793823242, |
| "epoch": 0.09494949494949495, |
| "grad_norm": 1.949431436305181, |
| "kl": 0.0002506256103515625, |
| "learning_rate": 2e-07, |
| "loss": 0.01011454164981842, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333469927311, |
| "reward_std": 0.3637147039175034, |
| "rewards/MultiModalAccuracyORM": 0.3333333469927311, |
| "step": 235, |
| "train_speed(iter/s)": 0.03819 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 360.533341217041, |
| "epoch": 0.09696969696969697, |
| "grad_norm": 0.5471178347466235, |
| "kl": 0.0010341405868530273, |
| "learning_rate": 2e-07, |
| "loss": -0.0015352100133895874, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.01666666716337204, |
| "reward": 0.28333333805203437, |
| "reward_std": 0.3511823683977127, |
| "rewards/MultiModalAccuracyORM": 0.28333333805203437, |
| "step": 240, |
| "train_speed(iter/s)": 0.037977 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 336.3000129699707, |
| "epoch": 0.09898989898989899, |
| "grad_norm": 2.3165413137247333, |
| "kl": 0.00027217864990234373, |
| "learning_rate": 2e-07, |
| "loss": 0.0210051491856575, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.32500000968575476, |
| "reward_std": 0.38450039029121397, |
| "rewards/MultiModalAccuracyORM": 0.32500000968575476, |
| "step": 245, |
| "train_speed(iter/s)": 0.037993 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 2.645674704495033, |
| "learning_rate": 2e-07, |
| "loss": -0.03384391665458679, |
| "memory(GiB)": 87.45, |
| "step": 250, |
| "train_speed(iter/s)": 0.038032 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 334.34500762939456, |
| "eval_kl": 0.0004983329772949218, |
| "eval_loss": 0.023834386840462685, |
| "eval_response_clip_ratio": 0.003333333432674408, |
| "eval_reward": 0.24666667267680167, |
| "eval_reward_std": 0.30061395645141603, |
| "eval_rewards/MultiModalAccuracyORM": 0.24666667267680167, |
| "eval_runtime": 585.2435, |
| "eval_samples_per_second": 0.085, |
| "eval_steps_per_second": 0.009, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.79583778381345, |
| "epoch": 0.10303030303030303, |
| "grad_norm": 1.5910045148895993, |
| "kl": 0.0006116151809692383, |
| "learning_rate": 2e-07, |
| "loss": -0.05511324405670166, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.34166667647659776, |
| "reward_std": 0.3701108664274216, |
| "rewards/MultiModalAccuracyORM": 0.34166667647659776, |
| "step": 255, |
| "train_speed(iter/s)": 0.03329 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.85, |
| "epoch": 0.10505050505050505, |
| "grad_norm": 1.8789057522234565, |
| "kl": 0.0006687164306640625, |
| "learning_rate": 2e-07, |
| "loss": 0.08147464394569397, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000037252903, |
| "reward_std": 0.3494287371635437, |
| "rewards/MultiModalAccuracyORM": 0.3500000037252903, |
| "step": 260, |
| "train_speed(iter/s)": 0.033421 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 327.0, |
| "epoch": 0.10707070707070707, |
| "grad_norm": 1.685788699755795, |
| "kl": 0.00030879974365234376, |
| "learning_rate": 2e-07, |
| "loss": 0.0021983295679092406, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333358168602, |
| "reward_std": 0.3010816007852554, |
| "rewards/MultiModalAccuracyORM": 0.2083333358168602, |
| "step": 265, |
| "train_speed(iter/s)": 0.033374 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.5, |
| "epoch": 0.10909090909090909, |
| "grad_norm": 2.9700739773322695, |
| "kl": 0.00040111541748046877, |
| "learning_rate": 2e-07, |
| "loss": -0.004064649343490601, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333730697632, |
| "reward_std": 0.33526621460914613, |
| "rewards/MultiModalAccuracyORM": 0.15833333730697632, |
| "step": 270, |
| "train_speed(iter/s)": 0.033364 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.25, |
| "epoch": 0.1111111111111111, |
| "grad_norm": 1.5939506920216808, |
| "kl": 0.00045032501220703124, |
| "learning_rate": 2e-07, |
| "loss": 0.026332959532737732, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.2526735752820969, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 275, |
| "train_speed(iter/s)": 0.033468 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 496.5, |
| "epoch": 0.11313131313131314, |
| "grad_norm": 1.3058289755881347, |
| "kl": 0.000375831127166748, |
| "learning_rate": 2e-07, |
| "loss": 0.027166426181793213, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500001341104506, |
| "reward_std": 0.37195890247821806, |
| "rewards/MultiModalAccuracyORM": 0.32500001341104506, |
| "step": 280, |
| "train_speed(iter/s)": 0.033383 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.05, |
| "epoch": 0.11515151515151516, |
| "grad_norm": 0.5211592745612927, |
| "kl": 0.0004334449768066406, |
| "learning_rate": 2e-07, |
| "loss": -0.001045474410057068, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000149011612, |
| "reward_std": 0.1808116167783737, |
| "rewards/MultiModalAccuracyORM": 0.22500000149011612, |
| "step": 285, |
| "train_speed(iter/s)": 0.03333 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.25, |
| "epoch": 0.11717171717171718, |
| "grad_norm": 1.9995357461573446, |
| "kl": 0.0005333900451660156, |
| "learning_rate": 2e-07, |
| "loss": -0.00281745046377182, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333656191825, |
| "reward_std": 0.3385071337223053, |
| "rewards/MultiModalAccuracyORM": 0.18333333656191825, |
| "step": 290, |
| "train_speed(iter/s)": 0.033413 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 387.1, |
| "epoch": 0.1191919191919192, |
| "grad_norm": 3.694756818436622, |
| "kl": 0.0010143280029296874, |
| "learning_rate": 2e-07, |
| "loss": -0.003062787652015686, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333879709244, |
| "reward_std": 0.314164274930954, |
| "rewards/MultiModalAccuracyORM": 0.15833333879709244, |
| "step": 295, |
| "train_speed(iter/s)": 0.03345 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 393.25, |
| "epoch": 0.12121212121212122, |
| "grad_norm": 1.5577866137872902, |
| "kl": 0.00044269561767578124, |
| "learning_rate": 2e-07, |
| "loss": -0.022827643156051635, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333447575569, |
| "reward_std": 0.3393001317977905, |
| "rewards/MultiModalAccuracyORM": 0.2583333447575569, |
| "step": 300, |
| "train_speed(iter/s)": 0.033327 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 416.25, |
| "epoch": 0.12323232323232323, |
| "grad_norm": 0.8793802822161716, |
| "kl": 0.00045299530029296875, |
| "learning_rate": 2e-07, |
| "loss": 0.039026769995689395, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2333333395421505, |
| "reward_std": 0.33277973234653474, |
| "rewards/MultiModalAccuracyORM": 0.2333333395421505, |
| "step": 305, |
| "train_speed(iter/s)": 0.032887 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 334.3, |
| "epoch": 0.12525252525252525, |
| "grad_norm": 1.9841151826732792, |
| "kl": 0.0006313323974609375, |
| "learning_rate": 2e-07, |
| "loss": -0.006224775314331054, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334252238275, |
| "reward_std": 0.31441850066184995, |
| "rewards/MultiModalAccuracyORM": 0.23333334252238275, |
| "step": 310, |
| "train_speed(iter/s)": 0.032913 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 537.7, |
| "epoch": 0.12727272727272726, |
| "grad_norm": 1.2729907719968943, |
| "kl": 0.0007027626037597656, |
| "learning_rate": 2e-07, |
| "loss": 0.014832744002342224, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.11666666939854622, |
| "reward_std": 0.25891573131084444, |
| "rewards/MultiModalAccuracyORM": 0.11666666939854622, |
| "step": 315, |
| "train_speed(iter/s)": 0.032886 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 282.8, |
| "epoch": 0.1292929292929293, |
| "grad_norm": 0.9148877498687834, |
| "kl": 0.000760650634765625, |
| "learning_rate": 2e-07, |
| "loss": 0.06303757429122925, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.2323044866323471, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 320, |
| "train_speed(iter/s)": 0.032974 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 404.8, |
| "epoch": 0.13131313131313133, |
| "grad_norm": 2.00474803214382, |
| "kl": 0.0007790565490722656, |
| "learning_rate": 2e-07, |
| "loss": 0.02660681903362274, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333656191826, |
| "reward_std": 0.2486636757850647, |
| "rewards/MultiModalAccuracyORM": 0.13333333656191826, |
| "step": 325, |
| "train_speed(iter/s)": 0.033068 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 333.8, |
| "epoch": 0.13333333333333333, |
| "grad_norm": 1.6448765146368245, |
| "kl": 0.0005625724792480469, |
| "learning_rate": 2e-07, |
| "loss": 0.024477413296699523, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.19166667237877846, |
| "reward_std": 0.2629852324724197, |
| "rewards/MultiModalAccuracyORM": 0.19166667237877846, |
| "step": 330, |
| "train_speed(iter/s)": 0.0332 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.25, |
| "epoch": 0.13535353535353536, |
| "grad_norm": 2.2001765187520776, |
| "kl": 0.0006697654724121093, |
| "learning_rate": 2e-07, |
| "loss": 0.07480921745300292, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500001043081285, |
| "reward_std": 0.37195890247821806, |
| "rewards/MultiModalAccuracyORM": 0.27500001043081285, |
| "step": 335, |
| "train_speed(iter/s)": 0.033276 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.1, |
| "epoch": 0.13737373737373737, |
| "grad_norm": 0.6836764259374134, |
| "kl": 0.0006744384765625, |
| "learning_rate": 2e-07, |
| "loss": 0.050872421264648436, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667386889457, |
| "reward_std": 0.25897533297538755, |
| "rewards/MultiModalAccuracyORM": 0.36666667386889457, |
| "step": 340, |
| "train_speed(iter/s)": 0.033397 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 430.0, |
| "epoch": 0.1393939393939394, |
| "grad_norm": 0.02974363962833146, |
| "kl": 0.0007775306701660156, |
| "learning_rate": 2e-07, |
| "loss": -0.00942653715610504, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1500000037252903, |
| "reward_std": 0.1933199405670166, |
| "rewards/MultiModalAccuracyORM": 0.1500000037252903, |
| "step": 345, |
| "train_speed(iter/s)": 0.033409 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.15, |
| "epoch": 0.1414141414141414, |
| "grad_norm": 2.153809687333121, |
| "kl": 0.00106048583984375, |
| "learning_rate": 2e-07, |
| "loss": -0.04788823127746582, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333425223827, |
| "reward_std": 0.3908641755580902, |
| "rewards/MultiModalAccuracyORM": 0.3833333425223827, |
| "step": 350, |
| "train_speed(iter/s)": 0.033484 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 273.95, |
| "epoch": 0.14343434343434344, |
| "grad_norm": 2.9003800421035084, |
| "kl": 0.001187896728515625, |
| "learning_rate": 2e-07, |
| "loss": -0.025590839982032775, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1500000014901161, |
| "reward_std": 0.24484840035438538, |
| "rewards/MultiModalAccuracyORM": 0.1500000014901161, |
| "step": 355, |
| "train_speed(iter/s)": 0.033613 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 258.15, |
| "epoch": 0.14545454545454545, |
| "grad_norm": 1.3041121484800926, |
| "kl": 0.001438140869140625, |
| "learning_rate": 2e-07, |
| "loss": 0.10738253593444824, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333656191825, |
| "reward_std": 0.3196970522403717, |
| "rewards/MultiModalAccuracyORM": 0.18333333656191825, |
| "step": 360, |
| "train_speed(iter/s)": 0.033727 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.15, |
| "epoch": 0.14747474747474748, |
| "grad_norm": 0.8360441109730193, |
| "kl": 0.00127105712890625, |
| "learning_rate": 2e-07, |
| "loss": -0.003975853323936462, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.05000000149011612, |
| "reward_std": 0.13558491468429565, |
| "rewards/MultiModalAccuracyORM": 0.05000000149011612, |
| "step": 365, |
| "train_speed(iter/s)": 0.033745 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 296.6, |
| "epoch": 0.1494949494949495, |
| "grad_norm": 2.3979328705343153, |
| "kl": 0.001323699951171875, |
| "learning_rate": 2e-07, |
| "loss": -0.048431962728500366, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.35312480926513673, |
| "rewards/MultiModalAccuracyORM": 0.25000000521540644, |
| "step": 370, |
| "train_speed(iter/s)": 0.033877 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.2, |
| "epoch": 0.15151515151515152, |
| "grad_norm": 1.5241819642025198, |
| "kl": 0.0015224456787109376, |
| "learning_rate": 2e-07, |
| "loss": 0.08156558275222778, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666768491268, |
| "reward_std": 0.30183603167533873, |
| "rewards/MultiModalAccuracyORM": 0.3916666768491268, |
| "step": 375, |
| "train_speed(iter/s)": 0.033941 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 318.7, |
| "epoch": 0.15353535353535352, |
| "grad_norm": 1.4091270455051919, |
| "kl": 0.0014804840087890626, |
| "learning_rate": 2e-07, |
| "loss": -0.005422207713127136, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.29863070249557494, |
| "rewards/MultiModalAccuracyORM": 0.25833333656191826, |
| "step": 380, |
| "train_speed(iter/s)": 0.03401 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 332.4, |
| "epoch": 0.15555555555555556, |
| "grad_norm": 1.7741695775671322, |
| "kl": 0.0017261505126953125, |
| "learning_rate": 2e-07, |
| "loss": 0.013069793581962585, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833334028720853, |
| "reward_std": 0.41791602969169617, |
| "rewards/MultiModalAccuracyORM": 0.35833334028720853, |
| "step": 385, |
| "train_speed(iter/s)": 0.034132 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.15, |
| "epoch": 0.15757575757575756, |
| "grad_norm": 2.1621073881433954, |
| "kl": 0.001946258544921875, |
| "learning_rate": 2e-07, |
| "loss": 0.018825350701808928, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2000000074505806, |
| "reward_std": 0.329024064540863, |
| "rewards/MultiModalAccuracyORM": 0.2000000074505806, |
| "step": 390, |
| "train_speed(iter/s)": 0.034186 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 393.7, |
| "epoch": 0.1595959595959596, |
| "grad_norm": 1.8573956206789706, |
| "kl": 0.0013622283935546876, |
| "learning_rate": 2e-07, |
| "loss": 0.01834181547164917, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333380520344, |
| "reward_std": 0.33226497769355773, |
| "rewards/MultiModalAccuracyORM": 0.2583333380520344, |
| "step": 395, |
| "train_speed(iter/s)": 0.034168 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 319.05, |
| "epoch": 0.16161616161616163, |
| "grad_norm": 2.2110728171395646, |
| "kl": 0.0019084930419921875, |
| "learning_rate": 2e-07, |
| "loss": 0.019550779461860658, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000521540642, |
| "reward_std": 0.3008869707584381, |
| "rewards/MultiModalAccuracyORM": 0.20000000521540642, |
| "step": 400, |
| "train_speed(iter/s)": 0.034255 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 263.5, |
| "epoch": 0.16363636363636364, |
| "grad_norm": 2.2884019112467, |
| "kl": 0.00233917236328125, |
| "learning_rate": 2e-07, |
| "loss": 0.00730045884847641, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000819563866, |
| "reward_std": 0.32297652661800386, |
| "rewards/MultiModalAccuracyORM": 0.30000000819563866, |
| "step": 405, |
| "train_speed(iter/s)": 0.034354 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.95, |
| "epoch": 0.16565656565656567, |
| "grad_norm": 3.384921120442682, |
| "kl": 0.001834869384765625, |
| "learning_rate": 2e-07, |
| "loss": 0.02867870032787323, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833334401249884, |
| "reward_std": 0.3604020655155182, |
| "rewards/MultiModalAccuracyORM": 0.30833334401249884, |
| "step": 410, |
| "train_speed(iter/s)": 0.034303 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.85, |
| "epoch": 0.16767676767676767, |
| "grad_norm": 2.578682884841481, |
| "kl": 0.0019824981689453127, |
| "learning_rate": 2e-07, |
| "loss": 0.007520823180675507, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.24105713665485382, |
| "rewards/MultiModalAccuracyORM": 0.20833333656191827, |
| "step": 415, |
| "train_speed(iter/s)": 0.034306 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 223.9, |
| "epoch": 0.1696969696969697, |
| "grad_norm": 2.841135168153006, |
| "kl": 0.003629302978515625, |
| "learning_rate": 2e-07, |
| "loss": 0.008403807878494263, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4166666746139526, |
| "reward_std": 0.31846399009227755, |
| "rewards/MultiModalAccuracyORM": 0.4166666746139526, |
| "step": 420, |
| "train_speed(iter/s)": 0.034394 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.0, |
| "epoch": 0.1717171717171717, |
| "grad_norm": 1.3952154788825455, |
| "kl": 0.0026947021484375, |
| "learning_rate": 2e-07, |
| "loss": 0.016321972012519836, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3750000104308128, |
| "reward_std": 0.3541358977556229, |
| "rewards/MultiModalAccuracyORM": 0.3750000104308128, |
| "step": 425, |
| "train_speed(iter/s)": 0.034427 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 426.85, |
| "epoch": 0.17373737373737375, |
| "grad_norm": 2.642228792263709, |
| "kl": 0.0035511016845703124, |
| "learning_rate": 2e-07, |
| "loss": 0.04757256805896759, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000059604645, |
| "reward_std": 0.27122943103313446, |
| "rewards/MultiModalAccuracyORM": 0.3000000059604645, |
| "step": 430, |
| "train_speed(iter/s)": 0.034492 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.9, |
| "epoch": 0.17575757575757575, |
| "grad_norm": 2.3061110590781433, |
| "kl": 0.0025909423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.02955559492111206, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000819563863, |
| "reward_std": 0.42218015491962435, |
| "rewards/MultiModalAccuracyORM": 0.27500000819563863, |
| "step": 435, |
| "train_speed(iter/s)": 0.034539 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.9, |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.03487250614691778, |
| "kl": 0.00295562744140625, |
| "learning_rate": 2e-07, |
| "loss": 0.03084596395492554, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333432674407, |
| "reward_std": 0.2657532900571823, |
| "rewards/MultiModalAccuracyORM": 0.15833333432674407, |
| "step": 440, |
| "train_speed(iter/s)": 0.034613 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.5, |
| "epoch": 0.1797979797979798, |
| "grad_norm": 1.8186333166660678, |
| "kl": 0.0029296875, |
| "learning_rate": 2e-07, |
| "loss": -0.008677978813648225, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333358168602, |
| "reward_std": 0.23004821836948394, |
| "rewards/MultiModalAccuracyORM": 0.3083333358168602, |
| "step": 445, |
| "train_speed(iter/s)": 0.034594 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 277.7, |
| "epoch": 0.18181818181818182, |
| "grad_norm": 1.5483724144717876, |
| "kl": 0.003802490234375, |
| "learning_rate": 2e-07, |
| "loss": -0.010931169986724854, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667461395264, |
| "reward_std": 0.36794900298118594, |
| "rewards/MultiModalAccuracyORM": 0.21666667461395264, |
| "step": 450, |
| "train_speed(iter/s)": 0.034617 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 442.1, |
| "epoch": 0.18383838383838383, |
| "grad_norm": 0.8802169915779423, |
| "kl": 0.00302734375, |
| "learning_rate": 2e-07, |
| "loss": -0.04651644229888916, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000596046448, |
| "reward_std": 0.2963056802749634, |
| "rewards/MultiModalAccuracyORM": 0.15000000596046448, |
| "step": 455, |
| "train_speed(iter/s)": 0.034674 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 329.5, |
| "epoch": 0.18585858585858586, |
| "grad_norm": 1.6049021687383316, |
| "kl": 0.00660247802734375, |
| "learning_rate": 2e-07, |
| "loss": 0.008616116642951966, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.25741389989852903, |
| "rewards/MultiModalAccuracyORM": 0.25833333656191826, |
| "step": 460, |
| "train_speed(iter/s)": 0.034754 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 316.5, |
| "epoch": 0.18787878787878787, |
| "grad_norm": 2.893110887441056, |
| "kl": 0.002629852294921875, |
| "learning_rate": 2e-07, |
| "loss": 0.0028022266924381256, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000001415610315, |
| "reward_std": 0.4707459330558777, |
| "rewards/MultiModalAccuracyORM": 0.40000001415610315, |
| "step": 465, |
| "train_speed(iter/s)": 0.034821 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.5, |
| "epoch": 0.1898989898989899, |
| "grad_norm": 2.1102869760511584, |
| "kl": 0.0035003662109375, |
| "learning_rate": 2e-07, |
| "loss": 0.0047733023762702945, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000670552254, |
| "reward_std": 0.3082119345664978, |
| "rewards/MultiModalAccuracyORM": 0.20000000670552254, |
| "step": 470, |
| "train_speed(iter/s)": 0.034862 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 312.1, |
| "epoch": 0.1919191919191919, |
| "grad_norm": 2.403767582762209, |
| "kl": 0.00347442626953125, |
| "learning_rate": 2e-07, |
| "loss": 0.0637534499168396, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5083333484828472, |
| "reward_std": 0.34557787179946897, |
| "rewards/MultiModalAccuracyORM": 0.5083333484828472, |
| "step": 475, |
| "train_speed(iter/s)": 0.03495 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 348.65, |
| "epoch": 0.19393939393939394, |
| "grad_norm": 0.6979791277265925, |
| "kl": 0.00365142822265625, |
| "learning_rate": 2e-07, |
| "loss": -0.04180996119976044, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667088866236, |
| "reward_std": 0.32826719582080843, |
| "rewards/MultiModalAccuracyORM": 0.26666667088866236, |
| "step": 480, |
| "train_speed(iter/s)": 0.034951 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 269.85, |
| "epoch": 0.19595959595959597, |
| "grad_norm": 0.0525932465492366, |
| "kl": 0.00377197265625, |
| "learning_rate": 2e-07, |
| "loss": -0.014869007468223571, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000052154064, |
| "reward_std": 0.20967912971973418, |
| "rewards/MultiModalAccuracyORM": 0.4000000052154064, |
| "step": 485, |
| "train_speed(iter/s)": 0.035021 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.1, |
| "epoch": 0.19797979797979798, |
| "grad_norm": 1.6281647114218305, |
| "kl": 0.004177093505859375, |
| "learning_rate": 2e-07, |
| "loss": 0.015925824642181396, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.3227578908205032, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 490, |
| "train_speed(iter/s)": 0.035088 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 329.75, |
| "epoch": 0.2, |
| "grad_norm": 1.984961473458151, |
| "kl": 0.00326995849609375, |
| "learning_rate": 2e-07, |
| "loss": -0.0037449508905410766, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000819563863, |
| "reward_std": 0.2855509877204895, |
| "rewards/MultiModalAccuracyORM": 0.27500000819563863, |
| "step": 495, |
| "train_speed(iter/s)": 0.035113 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 0.6734714455829673, |
| "learning_rate": 2e-07, |
| "loss": -0.013085539638996124, |
| "memory(GiB)": 87.45, |
| "step": 500, |
| "train_speed(iter/s)": 0.035182 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 363.1450085449219, |
| "eval_kl": 0.003147125244140625, |
| "eval_loss": 0.024374496191740036, |
| "eval_response_clip_ratio": 0.003333333432674408, |
| "eval_reward": 0.26666667237877845, |
| "eval_reward_std": 0.28797652542591096, |
| "eval_rewards/MultiModalAccuracyORM": 0.26666667237877845, |
| "eval_runtime": 597.4581, |
| "eval_samples_per_second": 0.084, |
| "eval_steps_per_second": 0.008, |
| "step": 500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.4, |
| "epoch": 0.20404040404040405, |
| "grad_norm": 2.0097245676314053, |
| "kl": 0.002962684631347656, |
| "learning_rate": 2e-07, |
| "loss": 0.008341678977012634, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22916666902601718, |
| "reward_std": 0.28844616413116453, |
| "rewards/MultiModalAccuracyORM": 0.22916666902601718, |
| "step": 505, |
| "train_speed(iter/s)": 0.033026 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 478.15, |
| "epoch": 0.20606060606060606, |
| "grad_norm": 0.04671524557136776, |
| "kl": 0.004395294189453125, |
| "learning_rate": 2e-07, |
| "loss": 0.019101715087890624, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.22704698145389557, |
| "rewards/MultiModalAccuracyORM": 0.20833333656191827, |
| "step": 510, |
| "train_speed(iter/s)": 0.033029 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 390.65, |
| "epoch": 0.2080808080808081, |
| "grad_norm": 1.7656462373703843, |
| "kl": 0.003029632568359375, |
| "learning_rate": 2e-07, |
| "loss": 0.04230659604072571, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000521540642, |
| "reward_std": 0.248858305811882, |
| "rewards/MultiModalAccuracyORM": 0.22500000521540642, |
| "step": 515, |
| "train_speed(iter/s)": 0.032925 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 313.8, |
| "epoch": 0.2101010101010101, |
| "grad_norm": 1.2593604182587, |
| "kl": 0.0040802001953125, |
| "learning_rate": 2e-07, |
| "loss": -0.0020169973373413085, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4583333432674408, |
| "reward_std": 0.4390155434608459, |
| "rewards/MultiModalAccuracyORM": 0.4583333432674408, |
| "step": 520, |
| "train_speed(iter/s)": 0.032885 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 410.7, |
| "epoch": 0.21212121212121213, |
| "grad_norm": 10.635733115288671, |
| "kl": 0.006873321533203125, |
| "learning_rate": 2e-07, |
| "loss": 0.013639546930789948, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000447034837, |
| "reward_std": 0.29108133912086487, |
| "rewards/MultiModalAccuracyORM": 0.25000000447034837, |
| "step": 525, |
| "train_speed(iter/s)": 0.032731 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 335.65, |
| "epoch": 0.21414141414141413, |
| "grad_norm": 2.2605304578434664, |
| "kl": 0.00481109619140625, |
| "learning_rate": 2e-07, |
| "loss": 0.029361778497695924, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667610406877, |
| "reward_std": 0.3948740750551224, |
| "rewards/MultiModalAccuracyORM": 0.29166667610406877, |
| "step": 530, |
| "train_speed(iter/s)": 0.032671 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 300.95, |
| "epoch": 0.21616161616161617, |
| "grad_norm": 3.233553935601456, |
| "kl": 0.005239105224609375, |
| "learning_rate": 2e-07, |
| "loss": -0.02358839809894562, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000670552254, |
| "reward_std": 0.39305841624736787, |
| "rewards/MultiModalAccuracyORM": 0.32500000670552254, |
| "step": 535, |
| "train_speed(iter/s)": 0.032679 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.15, |
| "epoch": 0.21818181818181817, |
| "grad_norm": 1.4435208932830024, |
| "kl": 0.0038543701171875, |
| "learning_rate": 2e-07, |
| "loss": 0.012015002965927123, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.14166666939854622, |
| "reward_std": 0.2184889554977417, |
| "rewards/MultiModalAccuracyORM": 0.14166666939854622, |
| "step": 540, |
| "train_speed(iter/s)": 0.032663 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 280.7, |
| "epoch": 0.2202020202020202, |
| "grad_norm": 2.124111886424564, |
| "kl": 0.00633544921875, |
| "learning_rate": 2e-07, |
| "loss": 0.016453295946121216, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.43333334773778914, |
| "reward_std": 0.40082641541957853, |
| "rewards/MultiModalAccuracyORM": 0.43333334773778914, |
| "step": 545, |
| "train_speed(iter/s)": 0.032702 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 405.4, |
| "epoch": 0.2222222222222222, |
| "grad_norm": 2.528384017814939, |
| "kl": 0.004555511474609375, |
| "learning_rate": 2e-07, |
| "loss": -0.013006833195686341, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2333333410322666, |
| "reward_std": 0.3478317141532898, |
| "rewards/MultiModalAccuracyORM": 0.2333333410322666, |
| "step": 550, |
| "train_speed(iter/s)": 0.032503 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.4, |
| "epoch": 0.22424242424242424, |
| "grad_norm": 2.5915001907307977, |
| "kl": 0.00524749755859375, |
| "learning_rate": 2e-07, |
| "loss": 0.02111098766326904, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666753590107, |
| "reward_std": 0.3644451290369034, |
| "rewards/MultiModalAccuracyORM": 0.2916666753590107, |
| "step": 555, |
| "train_speed(iter/s)": 0.032469 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.65, |
| "epoch": 0.22626262626262628, |
| "grad_norm": 1.5712795723400375, |
| "kl": 0.004864501953125, |
| "learning_rate": 2e-07, |
| "loss": 0.06747217178344726, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.30639870166778566, |
| "rewards/MultiModalAccuracyORM": 0.21666667386889457, |
| "step": 560, |
| "train_speed(iter/s)": 0.032374 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.25, |
| "epoch": 0.22828282828282828, |
| "grad_norm": 2.1872516406963483, |
| "kl": 0.0059844970703125, |
| "learning_rate": 2e-07, |
| "loss": -0.01907222718000412, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333656191825, |
| "reward_std": 0.27402731478214265, |
| "rewards/MultiModalAccuracyORM": 0.18333333656191825, |
| "step": 565, |
| "train_speed(iter/s)": 0.03236 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 252.8, |
| "epoch": 0.23030303030303031, |
| "grad_norm": 1.9388301349526922, |
| "kl": 0.00828857421875, |
| "learning_rate": 2e-07, |
| "loss": 0.0710361123085022, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333507180214, |
| "reward_std": 0.26670235097408296, |
| "rewards/MultiModalAccuracyORM": 0.28333333507180214, |
| "step": 570, |
| "train_speed(iter/s)": 0.032388 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 475.4, |
| "epoch": 0.23232323232323232, |
| "grad_norm": 2.0643763651689424, |
| "kl": 0.0043544769287109375, |
| "learning_rate": 2e-07, |
| "loss": 0.038624811172485354, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667386889455, |
| "reward_std": 0.38400964736938475, |
| "rewards/MultiModalAccuracyORM": 0.29166667386889455, |
| "step": 575, |
| "train_speed(iter/s)": 0.032305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.45, |
| "epoch": 0.23434343434343435, |
| "grad_norm": 2.5185952971698566, |
| "kl": 0.00495452880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.02923307418823242, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667461395264, |
| "reward_std": 0.35012357234954833, |
| "rewards/MultiModalAccuracyORM": 0.36666667461395264, |
| "step": 580, |
| "train_speed(iter/s)": 0.032206 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.9, |
| "epoch": 0.23636363636363636, |
| "grad_norm": 1.8128917450324007, |
| "kl": 0.0055450439453125, |
| "learning_rate": 2e-07, |
| "loss": 0.013245610892772675, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333879709244, |
| "reward_std": 0.23710441291332246, |
| "rewards/MultiModalAccuracyORM": 0.28333333879709244, |
| "step": 585, |
| "train_speed(iter/s)": 0.032276 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 355.4, |
| "epoch": 0.2383838383838384, |
| "grad_norm": 4.329439973170006, |
| "kl": 0.00757293701171875, |
| "learning_rate": 2e-07, |
| "loss": -0.0028860807418823243, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000819563867, |
| "reward_std": 0.30661733746528624, |
| "rewards/MultiModalAccuracyORM": 0.25000000819563867, |
| "step": 590, |
| "train_speed(iter/s)": 0.032341 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 411.8, |
| "epoch": 0.2404040404040404, |
| "grad_norm": 1.8156019329792383, |
| "kl": 0.005291748046875, |
| "learning_rate": 2e-07, |
| "loss": -0.004809608310461044, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4166666753590107, |
| "reward_std": 0.40967183113098143, |
| "rewards/MultiModalAccuracyORM": 0.4166666753590107, |
| "step": 595, |
| "train_speed(iter/s)": 0.032425 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 518.65, |
| "epoch": 0.24242424242424243, |
| "grad_norm": 1.6812944635615767, |
| "kl": 0.0045440673828125, |
| "learning_rate": 2e-07, |
| "loss": 0.0016623079776763917, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667312383653, |
| "reward_std": 0.35134140253067014, |
| "rewards/MultiModalAccuracyORM": 0.21666667312383653, |
| "step": 600, |
| "train_speed(iter/s)": 0.032411 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.4, |
| "epoch": 0.24444444444444444, |
| "grad_norm": 2.089820121690527, |
| "kl": 0.00710906982421875, |
| "learning_rate": 2e-07, |
| "loss": -0.028999322652816774, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666738688946, |
| "reward_std": 0.33552044034004214, |
| "rewards/MultiModalAccuracyORM": 0.2666666738688946, |
| "step": 605, |
| "train_speed(iter/s)": 0.032494 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 246.75, |
| "epoch": 0.24646464646464647, |
| "grad_norm": 2.728310100204588, |
| "kl": 0.00543060302734375, |
| "learning_rate": 2e-07, |
| "loss": 0.03924176394939423, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666716337204, |
| "reward_std": 0.27078639566898344, |
| "rewards/MultiModalAccuracyORM": 0.1916666716337204, |
| "step": 610, |
| "train_speed(iter/s)": 0.032569 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.2, |
| "epoch": 0.24848484848484848, |
| "grad_norm": 1.3175052417192106, |
| "kl": 0.00468902587890625, |
| "learning_rate": 2e-07, |
| "loss": 0.038245481252670285, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000067055225, |
| "reward_std": 0.4048719048500061, |
| "rewards/MultiModalAccuracyORM": 0.2500000067055225, |
| "step": 615, |
| "train_speed(iter/s)": 0.032501 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.7, |
| "epoch": 0.2505050505050505, |
| "grad_norm": 1.9529912685373527, |
| "kl": 0.00550537109375, |
| "learning_rate": 2e-07, |
| "loss": 0.011770330369472504, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666753590107, |
| "reward_std": 0.2895964771509171, |
| "rewards/MultiModalAccuracyORM": 0.3916666753590107, |
| "step": 620, |
| "train_speed(iter/s)": 0.032477 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 382.75, |
| "epoch": 0.25252525252525254, |
| "grad_norm": 0.05113023046556139, |
| "kl": 0.00566864013671875, |
| "learning_rate": 2e-07, |
| "loss": 0.01361861228942871, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2333333358168602, |
| "reward_std": 0.275274920463562, |
| "rewards/MultiModalAccuracyORM": 0.2333333358168602, |
| "step": 625, |
| "train_speed(iter/s)": 0.032463 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.15, |
| "epoch": 0.2545454545454545, |
| "grad_norm": 2.556743977258531, |
| "kl": 0.005108642578125, |
| "learning_rate": 2e-07, |
| "loss": 0.014950770139694213, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333425223827, |
| "reward_std": 0.34713688492774963, |
| "rewards/MultiModalAccuracyORM": 0.3333333425223827, |
| "step": 630, |
| "train_speed(iter/s)": 0.032484 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.15, |
| "epoch": 0.25656565656565655, |
| "grad_norm": 2.2423462644187624, |
| "kl": 0.004283905029296875, |
| "learning_rate": 2e-07, |
| "loss": 0.008650130033493042, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333395421505, |
| "reward_std": 0.28752902448177337, |
| "rewards/MultiModalAccuracyORM": 0.1833333395421505, |
| "step": 635, |
| "train_speed(iter/s)": 0.032416 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.15, |
| "epoch": 0.2585858585858586, |
| "grad_norm": 2.7318256637713327, |
| "kl": 0.0051483154296875, |
| "learning_rate": 2e-07, |
| "loss": 0.021026265621185303, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000111758709, |
| "reward_std": 0.29385479390621183, |
| "rewards/MultiModalAccuracyORM": 0.2500000111758709, |
| "step": 640, |
| "train_speed(iter/s)": 0.032398 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.9, |
| "epoch": 0.2606060606060606, |
| "grad_norm": 0.04170508484645814, |
| "kl": 0.00531463623046875, |
| "learning_rate": 2e-07, |
| "loss": -0.04355872869491577, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000819563867, |
| "reward_std": 0.3089067697525024, |
| "rewards/MultiModalAccuracyORM": 0.25000000819563867, |
| "step": 645, |
| "train_speed(iter/s)": 0.032375 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 407.55, |
| "epoch": 0.26262626262626265, |
| "grad_norm": 1.2451580073322923, |
| "kl": 0.003839111328125, |
| "learning_rate": 2e-07, |
| "loss": 0.00021180734038352966, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667461395266, |
| "reward_std": 0.2676923930644989, |
| "rewards/MultiModalAccuracyORM": 0.26666667461395266, |
| "step": 650, |
| "train_speed(iter/s)": 0.032327 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 432.75, |
| "epoch": 0.26464646464646463, |
| "grad_norm": 1.9808716749773743, |
| "kl": 0.00391082763671875, |
| "learning_rate": 2e-07, |
| "loss": 0.026480630040168762, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000670552253, |
| "reward_std": 0.2817953139543533, |
| "rewards/MultiModalAccuracyORM": 0.22500000670552253, |
| "step": 655, |
| "train_speed(iter/s)": 0.032322 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 463.85, |
| "epoch": 0.26666666666666666, |
| "grad_norm": 1.1399233339835215, |
| "kl": 0.004100799560546875, |
| "learning_rate": 2e-07, |
| "loss": -0.02441052794456482, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333380520344, |
| "reward_std": 0.25897533297538755, |
| "rewards/MultiModalAccuracyORM": 0.1833333380520344, |
| "step": 660, |
| "train_speed(iter/s)": 0.032385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 254.3, |
| "epoch": 0.2686868686868687, |
| "grad_norm": 2.4222117834215964, |
| "kl": 0.0057952880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.01856023073196411, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2166666716337204, |
| "reward_std": 0.3348231792449951, |
| "rewards/MultiModalAccuracyORM": 0.2166666716337204, |
| "step": 665, |
| "train_speed(iter/s)": 0.032448 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 360.55, |
| "epoch": 0.27070707070707073, |
| "grad_norm": 2.596880019981878, |
| "kl": 0.0034820556640625, |
| "learning_rate": 2e-07, |
| "loss": -0.004870015382766724, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000001192092893, |
| "reward_std": 0.3786772578954697, |
| "rewards/MultiModalAccuracyORM": 0.40000001192092893, |
| "step": 670, |
| "train_speed(iter/s)": 0.032507 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 547.2, |
| "epoch": 0.2727272727272727, |
| "grad_norm": 1.261892143617939, |
| "kl": 0.003546142578125, |
| "learning_rate": 2e-07, |
| "loss": 0.018378911912441252, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.25365822613239286, |
| "rewards/MultiModalAccuracyORM": 0.24166667088866234, |
| "step": 675, |
| "train_speed(iter/s)": 0.032509 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 389.15, |
| "epoch": 0.27474747474747474, |
| "grad_norm": 1.5125590979703638, |
| "kl": 0.00487823486328125, |
| "learning_rate": 2e-07, |
| "loss": -0.004463189840316772, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000596046446, |
| "reward_std": 0.2488823115825653, |
| "rewards/MultiModalAccuracyORM": 0.25000000596046446, |
| "step": 680, |
| "train_speed(iter/s)": 0.03256 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 461.15, |
| "epoch": 0.2767676767676768, |
| "grad_norm": 0.0206379809755319, |
| "kl": 0.00426177978515625, |
| "learning_rate": 2e-07, |
| "loss": 0.021875476837158202, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666666865348815, |
| "reward_std": 0.26496326327323916, |
| "rewards/MultiModalAccuracyORM": 0.11666666865348815, |
| "step": 685, |
| "train_speed(iter/s)": 0.032514 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 333.5, |
| "epoch": 0.2787878787878788, |
| "grad_norm": 2.5475669372401737, |
| "kl": 0.00420379638671875, |
| "learning_rate": 2e-07, |
| "loss": 0.004043090343475342, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000447034836, |
| "reward_std": 0.30210480093955994, |
| "rewards/MultiModalAccuracyORM": 0.15000000447034836, |
| "step": 690, |
| "train_speed(iter/s)": 0.032521 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 416.7, |
| "epoch": 0.2808080808080808, |
| "grad_norm": 1.5500150159182102, |
| "kl": 0.00518798828125, |
| "learning_rate": 2e-07, |
| "loss": -0.023865307867527007, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333343267441, |
| "reward_std": 0.1683032989501953, |
| "rewards/MultiModalAccuracyORM": 0.1833333343267441, |
| "step": 695, |
| "train_speed(iter/s)": 0.032416 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 372.35, |
| "epoch": 0.2828282828282828, |
| "grad_norm": 1.9962407432487237, |
| "kl": 0.005457305908203125, |
| "learning_rate": 2e-07, |
| "loss": -0.028327393531799316, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.2250000037252903, |
| "reward_std": 0.2099333554506302, |
| "rewards/MultiModalAccuracyORM": 0.2250000037252903, |
| "step": 700, |
| "train_speed(iter/s)": 0.032361 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 313.4, |
| "epoch": 0.28484848484848485, |
| "grad_norm": 1.6074003724487615, |
| "kl": 0.00528717041015625, |
| "learning_rate": 2e-07, |
| "loss": 0.014926820993423462, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333410322666, |
| "reward_std": 0.27223809361457824, |
| "rewards/MultiModalAccuracyORM": 0.3083333410322666, |
| "step": 705, |
| "train_speed(iter/s)": 0.032343 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.5, |
| "epoch": 0.2868686868686869, |
| "grad_norm": 1.6995014935336248, |
| "kl": 0.0051483154296875, |
| "learning_rate": 2e-07, |
| "loss": -0.019916635751724244, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.24885829985141755, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 710, |
| "train_speed(iter/s)": 0.032338 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.3, |
| "epoch": 0.28888888888888886, |
| "grad_norm": 2.5308810289000134, |
| "kl": 0.00496978759765625, |
| "learning_rate": 2e-07, |
| "loss": 0.01712719202041626, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000447034834, |
| "reward_std": 0.3906455457210541, |
| "rewards/MultiModalAccuracyORM": 0.40000000447034834, |
| "step": 715, |
| "train_speed(iter/s)": 0.03227 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 464.2, |
| "epoch": 0.2909090909090909, |
| "grad_norm": 3.1179537828506865, |
| "kl": 0.00511016845703125, |
| "learning_rate": 2e-07, |
| "loss": -0.0032517150044441222, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500001043081283, |
| "reward_std": 0.3038526177406311, |
| "rewards/MultiModalAccuracyORM": 0.22500001043081283, |
| "step": 720, |
| "train_speed(iter/s)": 0.032189 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 339.9, |
| "epoch": 0.29292929292929293, |
| "grad_norm": 1.3264200657485663, |
| "kl": 0.0060546875, |
| "learning_rate": 2e-07, |
| "loss": 0.005654716491699218, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000059604645, |
| "reward_std": 0.2988493382930756, |
| "rewards/MultiModalAccuracyORM": 0.2250000059604645, |
| "step": 725, |
| "train_speed(iter/s)": 0.032186 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 471.65, |
| "epoch": 0.29494949494949496, |
| "grad_norm": 0.5240042260688945, |
| "kl": 0.005621719360351563, |
| "learning_rate": 2e-07, |
| "loss": 0.010572614520788193, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1750000037252903, |
| "reward_std": 0.2945852130651474, |
| "rewards/MultiModalAccuracyORM": 0.1750000037252903, |
| "step": 730, |
| "train_speed(iter/s)": 0.032129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 445.2, |
| "epoch": 0.296969696969697, |
| "grad_norm": 2.049661779713074, |
| "kl": 0.00519866943359375, |
| "learning_rate": 2e-07, |
| "loss": 0.022058649361133574, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333410322666, |
| "reward_std": 0.38726511001586916, |
| "rewards/MultiModalAccuracyORM": 0.2833333410322666, |
| "step": 735, |
| "train_speed(iter/s)": 0.032089 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 391.25, |
| "epoch": 0.298989898989899, |
| "grad_norm": 0.962602559613357, |
| "kl": 0.0046844482421875, |
| "learning_rate": 2e-07, |
| "loss": -0.0028517723083496095, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.23328913748264313, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 740, |
| "train_speed(iter/s)": 0.032072 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 364.5, |
| "epoch": 0.301010101010101, |
| "grad_norm": 2.0529334461639337, |
| "kl": 0.00500030517578125, |
| "learning_rate": 2e-07, |
| "loss": 0.0314439594745636, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334177732468, |
| "reward_std": 0.3212204694747925, |
| "rewards/MultiModalAccuracyORM": 0.23333334177732468, |
| "step": 745, |
| "train_speed(iter/s)": 0.032037 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 1.3580773911974338, |
| "learning_rate": 2e-07, |
| "loss": -0.007335931062698364, |
| "memory(GiB)": 87.45, |
| "step": 750, |
| "train_speed(iter/s)": 0.032014 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 352.49667709350587, |
| "eval_kl": 0.00640625, |
| "eval_loss": 0.002320815809071064, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.2716666729748249, |
| "eval_reward_std": 0.33371097803115846, |
| "eval_rewards/MultiModalAccuracyORM": 0.2716666729748249, |
| "eval_runtime": 876.1057, |
| "eval_samples_per_second": 0.057, |
| "eval_steps_per_second": 0.006, |
| "step": 750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 392.55, |
| "epoch": 0.30505050505050507, |
| "grad_norm": 2.1426610619194815, |
| "kl": 0.00631256103515625, |
| "learning_rate": 2e-07, |
| "loss": -0.040098315477371214, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333656191826, |
| "reward_std": 0.22312387079000473, |
| "rewards/MultiModalAccuracyORM": 0.13333333656191826, |
| "step": 755, |
| "train_speed(iter/s)": 0.029206 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 468.4, |
| "epoch": 0.30707070707070705, |
| "grad_norm": 0.8717248302301553, |
| "kl": 0.00636749267578125, |
| "learning_rate": 2e-07, |
| "loss": 0.015009742975234986, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333358168602, |
| "reward_std": 0.2940850019454956, |
| "rewards/MultiModalAccuracyORM": 0.2833333358168602, |
| "step": 760, |
| "train_speed(iter/s)": 0.029162 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.8, |
| "epoch": 0.3090909090909091, |
| "grad_norm": 2.4403464428155925, |
| "kl": 0.0062957763671875, |
| "learning_rate": 2e-07, |
| "loss": 0.019652032852172853, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45000001043081284, |
| "reward_std": 0.3222792655229568, |
| "rewards/MultiModalAccuracyORM": 0.45000001043081284, |
| "step": 765, |
| "train_speed(iter/s)": 0.029211 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 448.35, |
| "epoch": 0.3111111111111111, |
| "grad_norm": 1.6980769345505524, |
| "kl": 0.0074066162109375, |
| "learning_rate": 2e-07, |
| "loss": 0.018609333038330077, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667833924295, |
| "reward_std": 0.4026396483182907, |
| "rewards/MultiModalAccuracyORM": 0.31666667833924295, |
| "step": 770, |
| "train_speed(iter/s)": 0.029164 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.35, |
| "epoch": 0.31313131313131315, |
| "grad_norm": 1.4345330108808567, |
| "kl": 0.00540924072265625, |
| "learning_rate": 2e-07, |
| "loss": 0.034766983985900876, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.26666667088866236, |
| "reward_std": 0.3167103588581085, |
| "rewards/MultiModalAccuracyORM": 0.26666667088866236, |
| "step": 775, |
| "train_speed(iter/s)": 0.029127 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 441.5, |
| "epoch": 0.3151515151515151, |
| "grad_norm": 1.0920815430357467, |
| "kl": 0.0054931640625, |
| "learning_rate": 2e-07, |
| "loss": -7.512569427490235e-05, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10833333656191826, |
| "reward_std": 0.22400068640708923, |
| "rewards/MultiModalAccuracyORM": 0.10833333656191826, |
| "step": 780, |
| "train_speed(iter/s)": 0.029106 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 435.7, |
| "epoch": 0.31717171717171716, |
| "grad_norm": 1.3732918705207908, |
| "kl": 0.00477752685546875, |
| "learning_rate": 2e-07, |
| "loss": 0.015651023387908934, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2416666679084301, |
| "reward_std": 0.23479096889495848, |
| "rewards/MultiModalAccuracyORM": 0.2416666679084301, |
| "step": 785, |
| "train_speed(iter/s)": 0.029052 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 453.35, |
| "epoch": 0.3191919191919192, |
| "grad_norm": 2.1057593122144005, |
| "kl": 0.00804595947265625, |
| "learning_rate": 2e-07, |
| "loss": -0.0006304442882537842, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.3619014710187912, |
| "rewards/MultiModalAccuracyORM": 0.3166666753590107, |
| "step": 790, |
| "train_speed(iter/s)": 0.029057 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.2, |
| "epoch": 0.3212121212121212, |
| "grad_norm": 2.3354800713445654, |
| "kl": 0.0078216552734375, |
| "learning_rate": 2e-07, |
| "loss": 0.0310418963432312, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667759418486, |
| "reward_std": 0.40556674003601073, |
| "rewards/MultiModalAccuracyORM": 0.36666667759418486, |
| "step": 795, |
| "train_speed(iter/s)": 0.029005 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 409.45, |
| "epoch": 0.32323232323232326, |
| "grad_norm": 2.4825567652901444, |
| "kl": 0.0077880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.021943604946136473, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3666666761040688, |
| "reward_std": 0.3470627337694168, |
| "rewards/MultiModalAccuracyORM": 0.3666666761040688, |
| "step": 800, |
| "train_speed(iter/s)": 0.028978 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.9, |
| "epoch": 0.32525252525252524, |
| "grad_norm": 3.589672291824819, |
| "kl": 0.00778350830078125, |
| "learning_rate": 2e-07, |
| "loss": 0.008873769640922546, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.44790194034576414, |
| "rewards/MultiModalAccuracyORM": 0.3166666753590107, |
| "step": 805, |
| "train_speed(iter/s)": 0.028982 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.2, |
| "epoch": 0.32727272727272727, |
| "grad_norm": 2.1262920297539925, |
| "kl": 0.0073883056640625, |
| "learning_rate": 2e-07, |
| "loss": -0.04254024624824524, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333447575569, |
| "reward_std": 0.3589002341032028, |
| "rewards/MultiModalAccuracyORM": 0.2833333447575569, |
| "step": 810, |
| "train_speed(iter/s)": 0.029003 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.9, |
| "epoch": 0.3292929292929293, |
| "grad_norm": 2.6338345195445965, |
| "kl": 0.0073974609375, |
| "learning_rate": 2e-07, |
| "loss": 0.008789122104644775, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333879709245, |
| "reward_std": 0.3563657283782959, |
| "rewards/MultiModalAccuracyORM": 0.35833333879709245, |
| "step": 815, |
| "train_speed(iter/s)": 0.028993 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.85, |
| "epoch": 0.33131313131313134, |
| "grad_norm": 2.540831778543349, |
| "kl": 0.0085357666015625, |
| "learning_rate": 2e-07, |
| "loss": 0.0017573148012161254, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.33755565285682676, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 820, |
| "train_speed(iter/s)": 0.02904 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.3, |
| "epoch": 0.3333333333333333, |
| "grad_norm": 2.280326105508933, |
| "kl": 0.011834716796875, |
| "learning_rate": 2e-07, |
| "loss": -0.016002975404262543, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000596046447, |
| "reward_std": 0.24662604331970214, |
| "rewards/MultiModalAccuracyORM": 0.17500000596046447, |
| "step": 825, |
| "train_speed(iter/s)": 0.02907 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 334.05, |
| "epoch": 0.33535353535353535, |
| "grad_norm": 1.64256260222623, |
| "kl": 0.00889892578125, |
| "learning_rate": 2e-07, |
| "loss": -0.008859094977378846, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000149011612, |
| "reward_std": 0.3164917230606079, |
| "rewards/MultiModalAccuracyORM": 0.20000000149011612, |
| "step": 830, |
| "train_speed(iter/s)": 0.029109 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.15, |
| "epoch": 0.3373737373737374, |
| "grad_norm": 0.09646041600368084, |
| "kl": 0.0067840576171875, |
| "learning_rate": 2e-07, |
| "loss": 0.02341327965259552, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000096857548, |
| "reward_std": 0.3184880018234253, |
| "rewards/MultiModalAccuracyORM": 0.2750000096857548, |
| "step": 835, |
| "train_speed(iter/s)": 0.028908 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 434.35, |
| "epoch": 0.3393939393939394, |
| "grad_norm": 0.886588445568382, |
| "kl": 0.0066741943359375, |
| "learning_rate": 2e-07, |
| "loss": 0.011455638706684113, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.34166667312383653, |
| "reward_std": 0.3142238825559616, |
| "rewards/MultiModalAccuracyORM": 0.34166667312383653, |
| "step": 840, |
| "train_speed(iter/s)": 0.02878 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 448.75, |
| "epoch": 0.3414141414141414, |
| "grad_norm": 0.0732846157739433, |
| "kl": 0.00753173828125, |
| "learning_rate": 2e-07, |
| "loss": 0.010994693636894226, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333507180213, |
| "reward_std": 0.19786564111709595, |
| "rewards/MultiModalAccuracyORM": 0.20833333507180213, |
| "step": 845, |
| "train_speed(iter/s)": 0.028759 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.0, |
| "epoch": 0.3434343434343434, |
| "grad_norm": 2.016101823545884, |
| "kl": 0.00940399169921875, |
| "learning_rate": 2e-07, |
| "loss": 0.0015551522374153137, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.37221312820911406, |
| "rewards/MultiModalAccuracyORM": 0.21666667386889457, |
| "step": 850, |
| "train_speed(iter/s)": 0.028759 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.75, |
| "epoch": 0.34545454545454546, |
| "grad_norm": 1.4804689213107514, |
| "kl": 0.0074249267578125, |
| "learning_rate": 2e-07, |
| "loss": 0.008444362878799438, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000521540644, |
| "reward_std": 0.33937130570411683, |
| "rewards/MultiModalAccuracyORM": 0.37500000521540644, |
| "step": 855, |
| "train_speed(iter/s)": 0.02876 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 317.1, |
| "epoch": 0.3474747474747475, |
| "grad_norm": 2.368905519842238, |
| "kl": 0.008038330078125, |
| "learning_rate": 2e-07, |
| "loss": 0.026756054162979125, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000111758709, |
| "reward_std": 0.44455128610134126, |
| "rewards/MultiModalAccuracyORM": 0.3500000111758709, |
| "step": 860, |
| "train_speed(iter/s)": 0.028787 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 276.85, |
| "epoch": 0.34949494949494947, |
| "grad_norm": 2.3043935598394203, |
| "kl": 0.0070343017578125, |
| "learning_rate": 2e-07, |
| "loss": 0.059600555896759035, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333432674408, |
| "reward_std": 0.3885723173618317, |
| "rewards/MultiModalAccuracyORM": 0.2833333432674408, |
| "step": 865, |
| "train_speed(iter/s)": 0.028814 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 417.95, |
| "epoch": 0.3515151515151515, |
| "grad_norm": 1.9471040249213727, |
| "kl": 0.0069305419921875, |
| "learning_rate": 2e-07, |
| "loss": 0.028457581996917725, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000596046447, |
| "reward_std": 0.2940494120121002, |
| "rewards/MultiModalAccuracyORM": 0.17500000596046447, |
| "step": 870, |
| "train_speed(iter/s)": 0.028708 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.5, |
| "epoch": 0.35353535353535354, |
| "grad_norm": 2.196604109706096, |
| "kl": 0.0058319091796875, |
| "learning_rate": 2e-07, |
| "loss": 0.04532061517238617, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666775941849, |
| "reward_std": 0.42524099349975586, |
| "rewards/MultiModalAccuracyORM": 0.3916666775941849, |
| "step": 875, |
| "train_speed(iter/s)": 0.028627 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 349.05, |
| "epoch": 0.35555555555555557, |
| "grad_norm": 1.9101064459839039, |
| "kl": 0.0102691650390625, |
| "learning_rate": 2e-07, |
| "loss": 0.04224415421485901, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.3391170799732208, |
| "rewards/MultiModalAccuracyORM": 0.30000000521540643, |
| "step": 880, |
| "train_speed(iter/s)": 0.028551 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 317.75, |
| "epoch": 0.3575757575757576, |
| "grad_norm": 1.7650856984522036, |
| "kl": 0.0097930908203125, |
| "learning_rate": 2e-07, |
| "loss": 0.031351178884506226, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333425223827, |
| "reward_std": 0.27555315792560575, |
| "rewards/MultiModalAccuracyORM": 0.3333333425223827, |
| "step": 885, |
| "train_speed(iter/s)": 0.028585 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.05, |
| "epoch": 0.3595959595959596, |
| "grad_norm": 2.4394117877960615, |
| "kl": 0.0123748779296875, |
| "learning_rate": 2e-07, |
| "loss": 0.01872892677783966, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000521540642, |
| "reward_std": 0.30489687621593475, |
| "rewards/MultiModalAccuracyORM": 0.22500000521540642, |
| "step": 890, |
| "train_speed(iter/s)": 0.028637 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 476.05, |
| "epoch": 0.3616161616161616, |
| "grad_norm": 2.3682785721081854, |
| "kl": 0.00737762451171875, |
| "learning_rate": 2e-07, |
| "loss": 0.02124558687210083, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.41817026138305663, |
| "rewards/MultiModalAccuracyORM": 0.3500000089406967, |
| "step": 895, |
| "train_speed(iter/s)": 0.028688 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 429.85, |
| "epoch": 0.36363636363636365, |
| "grad_norm": 1.3234500775547358, |
| "kl": 0.007550048828125, |
| "learning_rate": 2e-07, |
| "loss": 0.025475236773490905, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333656191825, |
| "reward_std": 0.2260383188724518, |
| "rewards/MultiModalAccuracyORM": 0.18333333656191825, |
| "step": 900, |
| "train_speed(iter/s)": 0.028712 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 439.75, |
| "epoch": 0.3656565656565657, |
| "grad_norm": 3.0802331121314785, |
| "kl": 0.0105621337890625, |
| "learning_rate": 2e-07, |
| "loss": 0.06260026693344116, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166666865348815, |
| "reward_std": 0.3003867596387863, |
| "rewards/MultiModalAccuracyORM": 0.24166666865348815, |
| "step": 905, |
| "train_speed(iter/s)": 0.028645 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.9, |
| "epoch": 0.36767676767676766, |
| "grad_norm": 3.596137864021678, |
| "kl": 0.01011199951171875, |
| "learning_rate": 2e-07, |
| "loss": 0.007353886961936951, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4833333432674408, |
| "reward_std": 0.38523324131965636, |
| "rewards/MultiModalAccuracyORM": 0.4833333432674408, |
| "step": 910, |
| "train_speed(iter/s)": 0.028662 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 296.65, |
| "epoch": 0.3696969696969697, |
| "grad_norm": 1.4417889638729746, |
| "kl": 0.01177978515625, |
| "learning_rate": 2e-07, |
| "loss": -0.006625932455062866, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666783392429, |
| "reward_std": 0.37195890843868257, |
| "rewards/MultiModalAccuracyORM": 0.3416666783392429, |
| "step": 915, |
| "train_speed(iter/s)": 0.028677 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 431.05, |
| "epoch": 0.3717171717171717, |
| "grad_norm": 2.8875811253312333, |
| "kl": 0.01148529052734375, |
| "learning_rate": 2e-07, |
| "loss": -3.943443298339844e-05, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667237877847, |
| "reward_std": 0.3604352355003357, |
| "rewards/MultiModalAccuracyORM": 0.41666667237877847, |
| "step": 920, |
| "train_speed(iter/s)": 0.028643 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.3, |
| "epoch": 0.37373737373737376, |
| "grad_norm": 1.8636332228250176, |
| "kl": 0.0091461181640625, |
| "learning_rate": 2e-07, |
| "loss": 0.004881632328033447, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.3440760403871536, |
| "rewards/MultiModalAccuracyORM": 0.2833333402872086, |
| "step": 925, |
| "train_speed(iter/s)": 0.028644 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.4, |
| "epoch": 0.37575757575757573, |
| "grad_norm": 2.1407505535783242, |
| "kl": 0.00869598388671875, |
| "learning_rate": 2e-07, |
| "loss": 0.05731675624847412, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833334103226663, |
| "reward_std": 0.41186849772930145, |
| "rewards/MultiModalAccuracyORM": 0.25833334103226663, |
| "step": 930, |
| "train_speed(iter/s)": 0.028644 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 322.9, |
| "epoch": 0.37777777777777777, |
| "grad_norm": 3.79021329286614, |
| "kl": 0.009942626953125, |
| "learning_rate": 2e-07, |
| "loss": 0.0477484941482544, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666716337204, |
| "reward_std": 0.2674737572669983, |
| "rewards/MultiModalAccuracyORM": 0.3166666716337204, |
| "step": 935, |
| "train_speed(iter/s)": 0.028648 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 283.45, |
| "epoch": 0.3797979797979798, |
| "grad_norm": 2.2451102482111724, |
| "kl": 0.012542724609375, |
| "learning_rate": 2e-07, |
| "loss": -1.335442066192627e-05, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000052154064, |
| "reward_std": 0.25591449439525604, |
| "rewards/MultiModalAccuracyORM": 0.3500000052154064, |
| "step": 940, |
| "train_speed(iter/s)": 0.028624 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 346.15, |
| "epoch": 0.38181818181818183, |
| "grad_norm": 1.4018775780145751, |
| "kl": 0.0094390869140625, |
| "learning_rate": 2e-07, |
| "loss": -0.003527042269706726, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.31088480055332185, |
| "rewards/MultiModalAccuracyORM": 0.24166667088866234, |
| "step": 945, |
| "train_speed(iter/s)": 0.028624 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 310.7, |
| "epoch": 0.3838383838383838, |
| "grad_norm": 3.8112599620979117, |
| "kl": 0.01011962890625, |
| "learning_rate": 2e-07, |
| "loss": 0.01941452920436859, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.34166667610406876, |
| "reward_std": 0.34228681921958926, |
| "rewards/MultiModalAccuracyORM": 0.34166667610406876, |
| "step": 950, |
| "train_speed(iter/s)": 0.028616 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 260.35, |
| "epoch": 0.38585858585858585, |
| "grad_norm": 1.8716114512263384, |
| "kl": 0.0135040283203125, |
| "learning_rate": 2e-07, |
| "loss": 0.01583598256111145, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666701436043, |
| "reward_std": 0.26291108727455137, |
| "rewards/MultiModalAccuracyORM": 0.3916666701436043, |
| "step": 955, |
| "train_speed(iter/s)": 0.028656 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 310.0, |
| "epoch": 0.3878787878787879, |
| "grad_norm": 2.6882447296010508, |
| "kl": 0.0098663330078125, |
| "learning_rate": 2e-07, |
| "loss": 0.008884111046791076, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333410322666, |
| "reward_std": 0.3393357157707214, |
| "rewards/MultiModalAccuracyORM": 0.2833333410322666, |
| "step": 960, |
| "train_speed(iter/s)": 0.02864 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 288.3, |
| "epoch": 0.3898989898989899, |
| "grad_norm": 2.477942143166408, |
| "kl": 0.013421630859375, |
| "learning_rate": 2e-07, |
| "loss": 0.013846510648727417, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667088866236, |
| "reward_std": 0.3041278898715973, |
| "rewards/MultiModalAccuracyORM": 0.39166667088866236, |
| "step": 965, |
| "train_speed(iter/s)": 0.028659 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 288.35, |
| "epoch": 0.39191919191919194, |
| "grad_norm": 1.7487986972843892, |
| "kl": 0.008868408203125, |
| "learning_rate": 2e-07, |
| "loss": 0.041995507478713986, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000037252903, |
| "reward_std": 0.3277524411678314, |
| "rewards/MultiModalAccuracyORM": 0.3250000037252903, |
| "step": 970, |
| "train_speed(iter/s)": 0.028675 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 326.9, |
| "epoch": 0.3939393939393939, |
| "grad_norm": 1.040945452450775, |
| "kl": 0.00943603515625, |
| "learning_rate": 2e-07, |
| "loss": 0.004313239455223083, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333656191825, |
| "reward_std": 0.2722140818834305, |
| "rewards/MultiModalAccuracyORM": 0.18333333656191825, |
| "step": 975, |
| "train_speed(iter/s)": 0.028693 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 298.5, |
| "epoch": 0.39595959595959596, |
| "grad_norm": 1.987178230745996, |
| "kl": 0.0092681884765625, |
| "learning_rate": 2e-07, |
| "loss": 0.01756379157304764, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333358168602, |
| "reward_std": 0.3274982154369354, |
| "rewards/MultiModalAccuracyORM": 0.1833333358168602, |
| "step": 980, |
| "train_speed(iter/s)": 0.028714 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 297.4, |
| "epoch": 0.397979797979798, |
| "grad_norm": 1.9999919818314047, |
| "kl": 0.012908935546875, |
| "learning_rate": 2e-07, |
| "loss": 0.04084535539150238, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500001341104506, |
| "reward_std": 0.2752989321947098, |
| "rewards/MultiModalAccuracyORM": 0.32500001341104506, |
| "step": 985, |
| "train_speed(iter/s)": 0.028744 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 506.15, |
| "epoch": 0.4, |
| "grad_norm": 0.038170370656060805, |
| "kl": 0.010888671875, |
| "learning_rate": 2e-07, |
| "loss": 0.07128549218177796, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000819563867, |
| "reward_std": 0.30416645109653473, |
| "rewards/MultiModalAccuracyORM": 0.25000000819563867, |
| "step": 990, |
| "train_speed(iter/s)": 0.028708 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 433.45, |
| "epoch": 0.402020202020202, |
| "grad_norm": 2.632502419980814, |
| "kl": 0.0100616455078125, |
| "learning_rate": 2e-07, |
| "loss": 0.016613197326660157, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667610406875, |
| "reward_std": 0.37174026668071747, |
| "rewards/MultiModalAccuracyORM": 0.39166667610406875, |
| "step": 995, |
| "train_speed(iter/s)": 0.028687 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.07099216395354724, |
| "learning_rate": 2e-07, |
| "loss": 0.02232474982738495, |
| "memory(GiB)": 87.45, |
| "step": 1000, |
| "train_speed(iter/s)": 0.028672 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 346.9533413696289, |
| "eval_kl": 0.013145751953125, |
| "eval_loss": -0.00028896695584990084, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.281666671782732, |
| "eval_reward_std": 0.3010890519618988, |
| "eval_rewards/MultiModalAccuracyORM": 0.281666671782732, |
| "eval_runtime": 1406.863, |
| "eval_samples_per_second": 0.036, |
| "eval_steps_per_second": 0.004, |
| "step": 1000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.175, |
| "epoch": 0.40606060606060607, |
| "grad_norm": 1.905945440484278, |
| "kl": 0.009429931640625, |
| "learning_rate": 2e-07, |
| "loss": -0.0033631980419158935, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2708333387970924, |
| "reward_std": 0.24963780641555786, |
| "rewards/MultiModalAccuracyORM": 0.2708333387970924, |
| "step": 1005, |
| "train_speed(iter/s)": 0.027262 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 341.5, |
| "epoch": 0.4080808080808081, |
| "grad_norm": 1.6755020591769207, |
| "kl": 0.0134246826171875, |
| "learning_rate": 2e-07, |
| "loss": 0.05349223613739014, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000037252903, |
| "reward_std": 0.3494287371635437, |
| "rewards/MultiModalAccuracyORM": 0.2500000037252903, |
| "step": 1010, |
| "train_speed(iter/s)": 0.027279 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.25, |
| "epoch": 0.4101010101010101, |
| "grad_norm": 2.8913380726136872, |
| "kl": 0.0107147216796875, |
| "learning_rate": 2e-07, |
| "loss": -0.02667723298072815, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333334103226663, |
| "reward_std": 0.4211569488048553, |
| "rewards/MultiModalAccuracyORM": 0.38333334103226663, |
| "step": 1015, |
| "train_speed(iter/s)": 0.027304 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 339.65, |
| "epoch": 0.4121212121212121, |
| "grad_norm": 4.180952848080379, |
| "kl": 0.0100433349609375, |
| "learning_rate": 2e-07, |
| "loss": 0.00991852581501007, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333447575569, |
| "reward_std": 0.3088736057281494, |
| "rewards/MultiModalAccuracyORM": 0.2083333447575569, |
| "step": 1020, |
| "train_speed(iter/s)": 0.027315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.55, |
| "epoch": 0.41414141414141414, |
| "grad_norm": 1.9667254904423306, |
| "kl": 0.0121246337890625, |
| "learning_rate": 2e-07, |
| "loss": 0.01899299621582031, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333507180214, |
| "reward_std": 0.3071291267871857, |
| "rewards/MultiModalAccuracyORM": 0.15833333507180214, |
| "step": 1025, |
| "train_speed(iter/s)": 0.027329 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 435.4, |
| "epoch": 0.4161616161616162, |
| "grad_norm": 1.7062594547415575, |
| "kl": 0.0100616455078125, |
| "learning_rate": 2e-07, |
| "loss": 0.004674983024597168, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500001043081283, |
| "reward_std": 0.3679134130477905, |
| "rewards/MultiModalAccuracyORM": 0.22500001043081283, |
| "step": 1030, |
| "train_speed(iter/s)": 0.027298 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 350.0, |
| "epoch": 0.41818181818181815, |
| "grad_norm": 72.23734764401382, |
| "kl": 0.011712646484375, |
| "learning_rate": 2e-07, |
| "loss": 0.05118045210838318, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666775941849, |
| "reward_std": 0.34735551476478577, |
| "rewards/MultiModalAccuracyORM": 0.2666666775941849, |
| "step": 1035, |
| "train_speed(iter/s)": 0.027303 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 311.15, |
| "epoch": 0.4202020202020202, |
| "grad_norm": 1.6715902563969363, |
| "kl": 0.0135772705078125, |
| "learning_rate": 2e-07, |
| "loss": 0.045872822403907776, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2416666716337204, |
| "reward_std": 0.287842845916748, |
| "rewards/MultiModalAccuracyORM": 0.2416666716337204, |
| "step": 1040, |
| "train_speed(iter/s)": 0.027298 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.15, |
| "epoch": 0.4222222222222222, |
| "grad_norm": 2.734745023688755, |
| "kl": 0.012158203125, |
| "learning_rate": 2e-07, |
| "loss": 0.05562522411346436, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.4314686059951782, |
| "rewards/MultiModalAccuracyORM": 0.31666667237877844, |
| "step": 1045, |
| "train_speed(iter/s)": 0.027328 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.3, |
| "epoch": 0.42424242424242425, |
| "grad_norm": 0.07598134741536419, |
| "kl": 0.009765625, |
| "learning_rate": 2e-07, |
| "loss": 0.008748695254325867, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.18326250910758973, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 1050, |
| "train_speed(iter/s)": 0.027308 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 392.2, |
| "epoch": 0.4262626262626263, |
| "grad_norm": 9.627726509942965, |
| "kl": 0.0136199951171875, |
| "learning_rate": 2e-07, |
| "loss": 0.03634963035583496, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666768491268, |
| "reward_std": 0.36670139729976653, |
| "rewards/MultiModalAccuracyORM": 0.3166666768491268, |
| "step": 1055, |
| "train_speed(iter/s)": 0.027311 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 289.7, |
| "epoch": 0.42828282828282827, |
| "grad_norm": 1.2371668114044378, |
| "kl": 0.0134979248046875, |
| "learning_rate": 2e-07, |
| "loss": 0.04366698265075684, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333373069763, |
| "reward_std": 0.3498693466186523, |
| "rewards/MultiModalAccuracyORM": 0.2083333373069763, |
| "step": 1060, |
| "train_speed(iter/s)": 0.027334 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.05, |
| "epoch": 0.4303030303030303, |
| "grad_norm": 2.52858518092475, |
| "kl": 0.0135711669921875, |
| "learning_rate": 2e-07, |
| "loss": 0.065219247341156, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000111758709, |
| "reward_std": 0.37853889763355253, |
| "rewards/MultiModalAccuracyORM": 0.3000000111758709, |
| "step": 1065, |
| "train_speed(iter/s)": 0.027352 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.5, |
| "epoch": 0.43232323232323233, |
| "grad_norm": 2.3424705728855995, |
| "kl": 0.0116546630859375, |
| "learning_rate": 2e-07, |
| "loss": 0.03819225430488586, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.3227578908205032, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 1070, |
| "train_speed(iter/s)": 0.027305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.55, |
| "epoch": 0.43434343434343436, |
| "grad_norm": 2.798437729299758, |
| "kl": 0.014569091796875, |
| "learning_rate": 2e-07, |
| "loss": 0.004848736524581909, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000819563863, |
| "reward_std": 0.31416428089141846, |
| "rewards/MultiModalAccuracyORM": 0.27500000819563863, |
| "step": 1075, |
| "train_speed(iter/s)": 0.027334 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 300.8, |
| "epoch": 0.43636363636363634, |
| "grad_norm": 1.7741031757506147, |
| "kl": 0.0157135009765625, |
| "learning_rate": 2e-07, |
| "loss": 0.00888105109333992, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.312698033452034, |
| "rewards/MultiModalAccuracyORM": 0.27500000447034834, |
| "step": 1080, |
| "train_speed(iter/s)": 0.027339 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 308.6, |
| "epoch": 0.4383838383838384, |
| "grad_norm": 2.06880703867489, |
| "kl": 0.0158050537109375, |
| "learning_rate": 2e-07, |
| "loss": -0.05194641947746277, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666716337204, |
| "reward_std": 0.22603832483291625, |
| "rewards/MultiModalAccuracyORM": 0.2666666716337204, |
| "step": 1085, |
| "train_speed(iter/s)": 0.027329 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 223.4, |
| "epoch": 0.4404040404040404, |
| "grad_norm": 2.4630209071132656, |
| "kl": 0.015411376953125, |
| "learning_rate": 2e-07, |
| "loss": -0.018011474609375, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333507180213, |
| "reward_std": 0.3071291267871857, |
| "rewards/MultiModalAccuracyORM": 0.20833333507180213, |
| "step": 1090, |
| "train_speed(iter/s)": 0.027372 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 264.2, |
| "epoch": 0.44242424242424244, |
| "grad_norm": 2.265643619288025, |
| "kl": 0.01461181640625, |
| "learning_rate": 2e-07, |
| "loss": 0.04221695959568024, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666701436043, |
| "reward_std": 0.3329358011484146, |
| "rewards/MultiModalAccuracyORM": 0.2666666701436043, |
| "step": 1095, |
| "train_speed(iter/s)": 0.027407 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.95, |
| "epoch": 0.4444444444444444, |
| "grad_norm": 2.894324596003934, |
| "kl": 0.009808349609375, |
| "learning_rate": 2e-07, |
| "loss": 0.02248055934906006, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4166666828095913, |
| "reward_std": 0.44607712924480436, |
| "rewards/MultiModalAccuracyORM": 0.4166666828095913, |
| "step": 1100, |
| "train_speed(iter/s)": 0.027442 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 247.95, |
| "epoch": 0.44646464646464645, |
| "grad_norm": 0.9507289625656876, |
| "kl": 0.0140777587890625, |
| "learning_rate": 2e-07, |
| "loss": -0.0001364484429359436, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.46666667237877846, |
| "reward_std": 0.24261614382267, |
| "rewards/MultiModalAccuracyORM": 0.46666667237877846, |
| "step": 1105, |
| "train_speed(iter/s)": 0.027471 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 238.75, |
| "epoch": 0.4484848484848485, |
| "grad_norm": 4.493560880958603, |
| "kl": 0.01422119140625, |
| "learning_rate": 2e-07, |
| "loss": 0.00024300813674926758, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4500000149011612, |
| "reward_std": 0.345323646068573, |
| "rewards/MultiModalAccuracyORM": 0.4500000149011612, |
| "step": 1110, |
| "train_speed(iter/s)": 0.027312 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 368.15, |
| "epoch": 0.4505050505050505, |
| "grad_norm": 1.866809698039603, |
| "kl": 0.0131317138671875, |
| "learning_rate": 2e-07, |
| "loss": -0.007444334030151367, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000298023224, |
| "reward_std": 0.3281930506229401, |
| "rewards/MultiModalAccuracyORM": 0.20000000298023224, |
| "step": 1115, |
| "train_speed(iter/s)": 0.027296 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.45, |
| "epoch": 0.45252525252525255, |
| "grad_norm": 0.04083454065583723, |
| "kl": 0.0086578369140625, |
| "learning_rate": 2e-07, |
| "loss": 0.009036242961883545, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000447034835, |
| "reward_std": 0.24710224866867064, |
| "rewards/MultiModalAccuracyORM": 0.22500000447034835, |
| "step": 1120, |
| "train_speed(iter/s)": 0.027256 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 299.75, |
| "epoch": 0.45454545454545453, |
| "grad_norm": 2.1257862237671588, |
| "kl": 0.01603851318359375, |
| "learning_rate": 2e-07, |
| "loss": -0.014222325384616851, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333432674408, |
| "reward_std": 0.4078585982322693, |
| "rewards/MultiModalAccuracyORM": 0.4333333432674408, |
| "step": 1125, |
| "train_speed(iter/s)": 0.027299 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.15, |
| "epoch": 0.45656565656565656, |
| "grad_norm": 48.10712707725128, |
| "kl": 0.0124542236328125, |
| "learning_rate": 2e-07, |
| "loss": 0.009453803300857544, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833333879709246, |
| "reward_std": 0.32858102321624755, |
| "rewards/MultiModalAccuracyORM": 0.30833333879709246, |
| "step": 1130, |
| "train_speed(iter/s)": 0.027339 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 434.6, |
| "epoch": 0.4585858585858586, |
| "grad_norm": 0.8869001794016839, |
| "kl": 0.01041259765625, |
| "learning_rate": 2e-07, |
| "loss": -0.002349555492401123, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333730697633, |
| "reward_std": 0.29003951847553255, |
| "rewards/MultiModalAccuracyORM": 0.13333333730697633, |
| "step": 1135, |
| "train_speed(iter/s)": 0.027364 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.0, |
| "epoch": 0.46060606060606063, |
| "grad_norm": 2.2315283680448346, |
| "kl": 0.0132476806640625, |
| "learning_rate": 2e-07, |
| "loss": -0.010060985386371613, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000745058059, |
| "reward_std": 0.3043610692024231, |
| "rewards/MultiModalAccuracyORM": 0.17500000745058059, |
| "step": 1140, |
| "train_speed(iter/s)": 0.027393 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 449.2, |
| "epoch": 0.4626262626262626, |
| "grad_norm": 0.04850876090724914, |
| "kl": 0.0081451416015625, |
| "learning_rate": 2e-07, |
| "loss": -0.022587394714355467, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10833333656191826, |
| "reward_std": 0.20343697369098662, |
| "rewards/MultiModalAccuracyORM": 0.10833333656191826, |
| "step": 1145, |
| "train_speed(iter/s)": 0.027421 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.05, |
| "epoch": 0.46464646464646464, |
| "grad_norm": 2.2096178690715, |
| "kl": 0.0104400634765625, |
| "learning_rate": 2e-07, |
| "loss": 0.01734369993209839, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000074505806, |
| "reward_std": 0.33700530230998993, |
| "rewards/MultiModalAccuracyORM": 0.3250000074505806, |
| "step": 1150, |
| "train_speed(iter/s)": 0.027419 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 308.65, |
| "epoch": 0.4666666666666667, |
| "grad_norm": 1.3995623416059861, |
| "kl": 0.020782470703125, |
| "learning_rate": 2e-07, |
| "loss": 0.004217700660228729, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.20594746768474578, |
| "rewards/MultiModalAccuracyORM": 0.27500000447034834, |
| "step": 1155, |
| "train_speed(iter/s)": 0.027419 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 229.45, |
| "epoch": 0.4686868686868687, |
| "grad_norm": 7.604841869136694, |
| "kl": 0.017425537109375, |
| "learning_rate": 2e-07, |
| "loss": 0.04910666048526764, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000029802322, |
| "reward_std": 0.3408561676740646, |
| "rewards/MultiModalAccuracyORM": 0.3000000029802322, |
| "step": 1160, |
| "train_speed(iter/s)": 0.02739 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.1, |
| "epoch": 0.4707070707070707, |
| "grad_norm": 1.7338556861412973, |
| "kl": 0.009881591796875, |
| "learning_rate": 2e-07, |
| "loss": -0.02307046055793762, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000149011613, |
| "reward_std": 0.18081162869930267, |
| "rewards/MultiModalAccuracyORM": 0.17500000149011613, |
| "step": 1165, |
| "train_speed(iter/s)": 0.027388 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.15, |
| "epoch": 0.4727272727272727, |
| "grad_norm": 1.2587552234540058, |
| "kl": 0.0092010498046875, |
| "learning_rate": 2e-07, |
| "loss": -0.05895323753356933, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666668429970743, |
| "reward_std": 0.40890581607818605, |
| "rewards/MultiModalAccuracyORM": 0.41666668429970743, |
| "step": 1170, |
| "train_speed(iter/s)": 0.027373 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.2, |
| "epoch": 0.47474747474747475, |
| "grad_norm": 0.06683334066144007, |
| "kl": 0.01002349853515625, |
| "learning_rate": 2e-07, |
| "loss": 0.02935360074043274, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2416666716337204, |
| "reward_std": 0.27523933053016664, |
| "rewards/MultiModalAccuracyORM": 0.2416666716337204, |
| "step": 1175, |
| "train_speed(iter/s)": 0.027312 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 443.15, |
| "epoch": 0.4767676767676768, |
| "grad_norm": 27.070556493942583, |
| "kl": 0.00930938720703125, |
| "learning_rate": 2e-07, |
| "loss": 0.0851466953754425, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000223517416, |
| "reward_std": 0.3342405825853348, |
| "rewards/MultiModalAccuracyORM": 0.25000000223517416, |
| "step": 1180, |
| "train_speed(iter/s)": 0.027331 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.55, |
| "epoch": 0.47878787878787876, |
| "grad_norm": 1.5534177345271625, |
| "kl": 0.0102996826171875, |
| "learning_rate": 2e-07, |
| "loss": 0.028819066286087037, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.266666679084301, |
| "reward_std": 0.3129431068897247, |
| "rewards/MultiModalAccuracyORM": 0.266666679084301, |
| "step": 1185, |
| "train_speed(iter/s)": 0.027335 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 327.65, |
| "epoch": 0.4808080808080808, |
| "grad_norm": 2.8838868478156816, |
| "kl": 0.02685546875, |
| "learning_rate": 2e-07, |
| "loss": 0.006991004943847657, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000447034836, |
| "reward_std": 0.2323400765657425, |
| "rewards/MultiModalAccuracyORM": 0.17500000447034836, |
| "step": 1190, |
| "train_speed(iter/s)": 0.027082 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 500.2, |
| "epoch": 0.48282828282828283, |
| "grad_norm": 2.6317167816627993, |
| "kl": 0.014031982421875, |
| "learning_rate": 2e-07, |
| "loss": -0.003238886594772339, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.20000000596046447, |
| "reward_std": 0.30388820767402647, |
| "rewards/MultiModalAccuracyORM": 0.20000000596046447, |
| "step": 1195, |
| "train_speed(iter/s)": 0.026955 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 259.35, |
| "epoch": 0.48484848484848486, |
| "grad_norm": 53.95756362621299, |
| "kl": 0.0124114990234375, |
| "learning_rate": 2e-07, |
| "loss": -0.00888831913471222, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000029802322, |
| "reward_std": 0.29782613217830656, |
| "rewards/MultiModalAccuracyORM": 0.3000000029802322, |
| "step": 1200, |
| "train_speed(iter/s)": 0.026995 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.0, |
| "epoch": 0.4868686868686869, |
| "grad_norm": 1.8840812265683782, |
| "kl": 0.016448974609375, |
| "learning_rate": 2e-07, |
| "loss": 0.024408812820911407, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.34166667610406876, |
| "reward_std": 0.4253006011247635, |
| "rewards/MultiModalAccuracyORM": 0.34166667610406876, |
| "step": 1205, |
| "train_speed(iter/s)": 0.02702 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 263.2, |
| "epoch": 0.4888888888888889, |
| "grad_norm": 2.267475237086073, |
| "kl": 0.01165771484375, |
| "learning_rate": 2e-07, |
| "loss": -0.02959960699081421, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4416666813194752, |
| "reward_std": 0.3111630380153656, |
| "rewards/MultiModalAccuracyORM": 0.4416666813194752, |
| "step": 1210, |
| "train_speed(iter/s)": 0.027058 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 341.55, |
| "epoch": 0.4909090909090909, |
| "grad_norm": 1.53249738300366, |
| "kl": 0.01207275390625, |
| "learning_rate": 2e-07, |
| "loss": 0.01664416640996933, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.39155901670455934, |
| "rewards/MultiModalAccuracyORM": 0.3500000089406967, |
| "step": 1215, |
| "train_speed(iter/s)": 0.027075 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 346.35, |
| "epoch": 0.49292929292929294, |
| "grad_norm": 2.838473944184638, |
| "kl": 0.0138153076171875, |
| "learning_rate": 2e-07, |
| "loss": 0.011857110261917114, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.16666667237877847, |
| "reward_std": 0.32422170639038084, |
| "rewards/MultiModalAccuracyORM": 0.16666667237877847, |
| "step": 1220, |
| "train_speed(iter/s)": 0.027075 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.2, |
| "epoch": 0.494949494949495, |
| "grad_norm": 2.239419757076915, |
| "kl": 0.0130462646484375, |
| "learning_rate": 2e-07, |
| "loss": 0.03971967101097107, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667088866234, |
| "reward_std": 0.23224489092826844, |
| "rewards/MultiModalAccuracyORM": 0.36666667088866234, |
| "step": 1225, |
| "train_speed(iter/s)": 0.027083 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 244.8, |
| "epoch": 0.49696969696969695, |
| "grad_norm": 2.1763944900135637, |
| "kl": 0.0342437744140625, |
| "learning_rate": 2e-07, |
| "loss": -0.010297659039497375, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3583333410322666, |
| "reward_std": 0.3408351272344589, |
| "rewards/MultiModalAccuracyORM": 0.3583333410322666, |
| "step": 1230, |
| "train_speed(iter/s)": 0.027096 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 293.45, |
| "epoch": 0.498989898989899, |
| "grad_norm": 6.002103596814289, |
| "kl": 0.020233154296875, |
| "learning_rate": 2e-07, |
| "loss": 0.08779069185256957, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000670552255, |
| "reward_std": 0.35311026573181153, |
| "rewards/MultiModalAccuracyORM": 0.40000000670552255, |
| "step": 1235, |
| "train_speed(iter/s)": 0.027106 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 468.45, |
| "epoch": 0.501010101010101, |
| "grad_norm": 1.7067044601090864, |
| "kl": 0.00786285400390625, |
| "learning_rate": 2e-07, |
| "loss": 0.05108952522277832, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.3033378630876541, |
| "rewards/MultiModalAccuracyORM": 0.3500000089406967, |
| "step": 1240, |
| "train_speed(iter/s)": 0.027091 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 397.4, |
| "epoch": 0.503030303030303, |
| "grad_norm": 0.8938521798548926, |
| "kl": 0.009466552734375, |
| "learning_rate": 2e-07, |
| "loss": -0.01685338616371155, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333656191826, |
| "reward_std": 0.2292436480522156, |
| "rewards/MultiModalAccuracyORM": 0.13333333656191826, |
| "step": 1245, |
| "train_speed(iter/s)": 0.02707 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 3.702370322108623, |
| "learning_rate": 2e-07, |
| "loss": 0.036279809474945066, |
| "memory(GiB)": 87.45, |
| "step": 1250, |
| "train_speed(iter/s)": 0.027086 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 321.4716763305664, |
| "eval_kl": 0.015718994140625, |
| "eval_loss": 0.013520264066755772, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.3033333399891853, |
| "eval_reward_std": 0.3383384072780609, |
| "eval_rewards/MultiModalAccuracyORM": 0.3033333399891853, |
| "eval_runtime": 765.5729, |
| "eval_samples_per_second": 0.065, |
| "eval_steps_per_second": 0.007, |
| "step": 1250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 349.475, |
| "epoch": 0.5070707070707071, |
| "grad_norm": 1.4811421198816048, |
| "kl": 0.01293487548828125, |
| "learning_rate": 2e-07, |
| "loss": 0.03056705594062805, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833334140479567, |
| "reward_std": 0.38048321902751925, |
| "rewards/MultiModalAccuracyORM": 0.35833334140479567, |
| "step": 1255, |
| "train_speed(iter/s)": 0.026435 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 209.15, |
| "epoch": 0.509090909090909, |
| "grad_norm": 2.0552411044504764, |
| "kl": 0.0252899169921875, |
| "learning_rate": 2e-07, |
| "loss": 0.028329643607139587, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334177732465, |
| "reward_std": 0.281466943025589, |
| "rewards/MultiModalAccuracyORM": 0.33333334177732465, |
| "step": 1260, |
| "train_speed(iter/s)": 0.026464 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.4, |
| "epoch": 0.5111111111111111, |
| "grad_norm": 2.615766039038286, |
| "kl": 0.01002197265625, |
| "learning_rate": 2e-07, |
| "loss": 0.002955615520477295, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.2292436480522156, |
| "rewards/MultiModalAccuracyORM": 0.25000000521540644, |
| "step": 1265, |
| "train_speed(iter/s)": 0.02646 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.85, |
| "epoch": 0.5131313131313131, |
| "grad_norm": 1.9893529067484352, |
| "kl": 0.011163330078125, |
| "learning_rate": 2e-07, |
| "loss": 0.018701747059822083, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.300000012665987, |
| "reward_std": 0.3127244770526886, |
| "rewards/MultiModalAccuracyORM": 0.300000012665987, |
| "step": 1270, |
| "train_speed(iter/s)": 0.026466 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 253.65, |
| "epoch": 0.5151515151515151, |
| "grad_norm": 1.6843559930041148, |
| "kl": 0.0115509033203125, |
| "learning_rate": 2e-07, |
| "loss": 0.012320590019226075, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5000000111758709, |
| "reward_std": 0.345323646068573, |
| "rewards/MultiModalAccuracyORM": 0.5000000111758709, |
| "step": 1275, |
| "train_speed(iter/s)": 0.026464 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 308.25, |
| "epoch": 0.5171717171717172, |
| "grad_norm": 3.0894548096911407, |
| "kl": 0.010302734375, |
| "learning_rate": 2e-07, |
| "loss": -0.02475722283124924, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000074505806, |
| "reward_std": 0.21999078691005708, |
| "rewards/MultiModalAccuracyORM": 0.2500000074505806, |
| "step": 1280, |
| "train_speed(iter/s)": 0.026449 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.45, |
| "epoch": 0.5191919191919192, |
| "grad_norm": 0.056162470903676515, |
| "kl": 0.010772705078125, |
| "learning_rate": 2e-07, |
| "loss": -0.0004087850451469421, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.23105688095092775, |
| "rewards/MultiModalAccuracyORM": 0.21666667386889457, |
| "step": 1285, |
| "train_speed(iter/s)": 0.026422 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.55, |
| "epoch": 0.5212121212121212, |
| "grad_norm": 1.7176303706462466, |
| "kl": 0.011578369140625, |
| "learning_rate": 2e-07, |
| "loss": 0.023639577627182006, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666693985462, |
| "reward_std": 0.28077210783958434, |
| "rewards/MultiModalAccuracyORM": 0.2666666693985462, |
| "step": 1290, |
| "train_speed(iter/s)": 0.026422 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.65, |
| "epoch": 0.5232323232323233, |
| "grad_norm": 1.244445708488179, |
| "kl": 0.0103790283203125, |
| "learning_rate": 2e-07, |
| "loss": -0.017145507037639618, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333395421505, |
| "reward_std": 0.4086130350828171, |
| "rewards/MultiModalAccuracyORM": 0.3833333395421505, |
| "step": 1295, |
| "train_speed(iter/s)": 0.026442 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.2, |
| "epoch": 0.5252525252525253, |
| "grad_norm": 1.7914388567184454, |
| "kl": 0.0092559814453125, |
| "learning_rate": 2e-07, |
| "loss": 0.054825717210769655, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333417773247, |
| "reward_std": 0.3536572724580765, |
| "rewards/MultiModalAccuracyORM": 0.3083333417773247, |
| "step": 1300, |
| "train_speed(iter/s)": 0.026415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 251.9, |
| "epoch": 0.5272727272727272, |
| "grad_norm": 2.6174114359405976, |
| "kl": 0.010308837890625, |
| "learning_rate": 2e-07, |
| "loss": -0.019986753165721894, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4500000074505806, |
| "reward_std": 0.3099655658006668, |
| "rewards/MultiModalAccuracyORM": 0.4500000074505806, |
| "step": 1305, |
| "train_speed(iter/s)": 0.026387 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 264.9, |
| "epoch": 0.5292929292929293, |
| "grad_norm": 32.625329420627345, |
| "kl": 0.00882568359375, |
| "learning_rate": 2e-07, |
| "loss": 0.008027985692024231, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667312383654, |
| "reward_std": 0.40485736131668093, |
| "rewards/MultiModalAccuracyORM": 0.41666667312383654, |
| "step": 1310, |
| "train_speed(iter/s)": 0.026366 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.3, |
| "epoch": 0.5313131313131313, |
| "grad_norm": 1.6706902692989012, |
| "kl": 0.0086761474609375, |
| "learning_rate": 2e-07, |
| "loss": 0.028931498527526855, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334624767303, |
| "reward_std": 0.3558539390563965, |
| "rewards/MultiModalAccuracyORM": 0.28333334624767303, |
| "step": 1315, |
| "train_speed(iter/s)": 0.026338 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 274.25, |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.8800912459209826, |
| "kl": 0.0249176025390625, |
| "learning_rate": 2e-07, |
| "loss": 0.048329290747642514, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.25897533297538755, |
| "rewards/MultiModalAccuracyORM": 0.31666667237877844, |
| "step": 1320, |
| "train_speed(iter/s)": 0.026308 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.3, |
| "epoch": 0.5353535353535354, |
| "grad_norm": 3.1086990293234904, |
| "kl": 0.01292724609375, |
| "learning_rate": 2e-07, |
| "loss": 0.006182897090911865, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.34166667312383653, |
| "reward_std": 0.3867922484874725, |
| "rewards/MultiModalAccuracyORM": 0.34166667312383653, |
| "step": 1325, |
| "train_speed(iter/s)": 0.026274 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 404.3, |
| "epoch": 0.5373737373737374, |
| "grad_norm": 0.08070215671871471, |
| "kl": 0.0099578857421875, |
| "learning_rate": 2e-07, |
| "loss": 0.062343114614486696, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000059604645, |
| "reward_std": 0.22625695466995238, |
| "rewards/MultiModalAccuracyORM": 0.3000000059604645, |
| "step": 1330, |
| "train_speed(iter/s)": 0.026241 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 360.75, |
| "epoch": 0.5393939393939394, |
| "grad_norm": 3.4146119265895893, |
| "kl": 0.0290008544921875, |
| "learning_rate": 2e-07, |
| "loss": -0.02337663769721985, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000819563863, |
| "reward_std": 0.31852359175682066, |
| "rewards/MultiModalAccuracyORM": 0.40000000819563863, |
| "step": 1335, |
| "train_speed(iter/s)": 0.026231 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 300.85, |
| "epoch": 0.5414141414141415, |
| "grad_norm": 1.014030648475331, |
| "kl": 0.0152801513671875, |
| "learning_rate": 2e-07, |
| "loss": 0.03424631953239441, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333425223827, |
| "reward_std": 0.22807018756866454, |
| "rewards/MultiModalAccuracyORM": 0.3333333425223827, |
| "step": 1340, |
| "train_speed(iter/s)": 0.026218 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 297.5, |
| "epoch": 0.5434343434343434, |
| "grad_norm": 2.579076344272663, |
| "kl": 0.0294189453125, |
| "learning_rate": 2e-07, |
| "loss": -0.004431784152984619, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.425000012665987, |
| "reward_std": 0.3433456152677536, |
| "rewards/MultiModalAccuracyORM": 0.425000012665987, |
| "step": 1345, |
| "train_speed(iter/s)": 0.026212 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 365.2, |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.09604007460689165, |
| "kl": 0.0132415771484375, |
| "learning_rate": 2e-07, |
| "loss": 0.011541323363780975, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333730697632, |
| "reward_std": 0.24961273670196532, |
| "rewards/MultiModalAccuracyORM": 0.15833333730697632, |
| "step": 1350, |
| "train_speed(iter/s)": 0.026199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.2, |
| "epoch": 0.5474747474747474, |
| "grad_norm": 2.8630066616840306, |
| "kl": 0.0131500244140625, |
| "learning_rate": 2e-07, |
| "loss": 0.0038095355033874513, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333484828472, |
| "reward_std": 0.371958914399147, |
| "rewards/MultiModalAccuracyORM": 0.4083333484828472, |
| "step": 1355, |
| "train_speed(iter/s)": 0.026195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 290.35, |
| "epoch": 0.5494949494949495, |
| "grad_norm": 2.8462264230542202, |
| "kl": 0.0113922119140625, |
| "learning_rate": 2e-07, |
| "loss": -0.013850301504135132, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333879709244, |
| "reward_std": 0.23857065439224243, |
| "rewards/MultiModalAccuracyORM": 0.13333333879709244, |
| "step": 1360, |
| "train_speed(iter/s)": 0.026178 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.8, |
| "epoch": 0.5515151515151515, |
| "grad_norm": 1.9037157526983224, |
| "kl": 0.0115966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.061475354433059695, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334252238274, |
| "reward_std": 0.37644500732421876, |
| "rewards/MultiModalAccuracyORM": 0.28333334252238274, |
| "step": 1365, |
| "train_speed(iter/s)": 0.026169 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 290.25, |
| "epoch": 0.5535353535353535, |
| "grad_norm": 1.5230914677267515, |
| "kl": 0.012347412109375, |
| "learning_rate": 2e-07, |
| "loss": 0.02505878210067749, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667386889457, |
| "reward_std": 0.26496326327323916, |
| "rewards/MultiModalAccuracyORM": 0.36666667386889457, |
| "step": 1370, |
| "train_speed(iter/s)": 0.026162 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 304.7, |
| "epoch": 0.5555555555555556, |
| "grad_norm": 1.9879722073308892, |
| "kl": 0.0135223388671875, |
| "learning_rate": 2e-07, |
| "loss": 0.010433109104633331, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.18332211077213287, |
| "rewards/MultiModalAccuracyORM": 0.27500000447034834, |
| "step": 1375, |
| "train_speed(iter/s)": 0.026157 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 319.9, |
| "epoch": 0.5575757575757576, |
| "grad_norm": 2.649637312336083, |
| "kl": 0.012469482421875, |
| "learning_rate": 2e-07, |
| "loss": 0.009650683403015137, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.3890485167503357, |
| "rewards/MultiModalAccuracyORM": 0.23333333805203438, |
| "step": 1380, |
| "train_speed(iter/s)": 0.02615 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 355.05, |
| "epoch": 0.5595959595959596, |
| "grad_norm": 0.05006149717815439, |
| "kl": 0.016656494140625, |
| "learning_rate": 2e-07, |
| "loss": -0.007993972301483155, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667386889455, |
| "reward_std": 0.3541334718465805, |
| "rewards/MultiModalAccuracyORM": 0.29166667386889455, |
| "step": 1385, |
| "train_speed(iter/s)": 0.026129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 155.65, |
| "epoch": 0.5616161616161616, |
| "grad_norm": 0.08079407011077554, |
| "kl": 0.01981201171875, |
| "learning_rate": 2e-07, |
| "loss": 0.03422499895095825, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833334103226663, |
| "reward_std": 0.3597048044204712, |
| "rewards/MultiModalAccuracyORM": 0.25833334103226663, |
| "step": 1390, |
| "train_speed(iter/s)": 0.026124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 411.3, |
| "epoch": 0.5636363636363636, |
| "grad_norm": 2.595093461800728, |
| "kl": 0.016748046875, |
| "learning_rate": 2e-07, |
| "loss": 0.0661674439907074, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667610406877, |
| "reward_std": 0.41412476599216463, |
| "rewards/MultiModalAccuracyORM": 0.41666667610406877, |
| "step": 1395, |
| "train_speed(iter/s)": 0.02611 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 312.3, |
| "epoch": 0.5656565656565656, |
| "grad_norm": 1.8524460034780388, |
| "kl": 0.0219970703125, |
| "learning_rate": 2e-07, |
| "loss": 0.0748141050338745, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333410322666, |
| "reward_std": 0.3222051203250885, |
| "rewards/MultiModalAccuracyORM": 0.2833333410322666, |
| "step": 1400, |
| "train_speed(iter/s)": 0.026104 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 284.7, |
| "epoch": 0.5676767676767677, |
| "grad_norm": 1.8645433263018287, |
| "kl": 0.020556640625, |
| "learning_rate": 2e-07, |
| "loss": -0.019703832268714905, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.23230449855327606, |
| "rewards/MultiModalAccuracyORM": 0.2833333402872086, |
| "step": 1405, |
| "train_speed(iter/s)": 0.026096 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 301.15, |
| "epoch": 0.5696969696969697, |
| "grad_norm": 2.007508731899272, |
| "kl": 0.014324951171875, |
| "learning_rate": 2e-07, |
| "loss": 0.026613450050354003, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000819563863, |
| "reward_std": 0.26928699016571045, |
| "rewards/MultiModalAccuracyORM": 0.40000000819563863, |
| "step": 1410, |
| "train_speed(iter/s)": 0.026082 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 384.6, |
| "epoch": 0.5717171717171717, |
| "grad_norm": 1.3049808616717113, |
| "kl": 0.0161651611328125, |
| "learning_rate": 2e-07, |
| "loss": -0.019157709181308748, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833334103226663, |
| "reward_std": 0.3352662205696106, |
| "rewards/MultiModalAccuracyORM": 0.25833334103226663, |
| "step": 1415, |
| "train_speed(iter/s)": 0.026066 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.4, |
| "epoch": 0.5737373737373738, |
| "grad_norm": 1.7990652267186868, |
| "kl": 0.021240234375, |
| "learning_rate": 2e-07, |
| "loss": 0.043132427334785464, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000298023224, |
| "reward_std": 0.2159808874130249, |
| "rewards/MultiModalAccuracyORM": 0.17500000298023224, |
| "step": 1420, |
| "train_speed(iter/s)": 0.026059 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 288.85, |
| "epoch": 0.5757575757575758, |
| "grad_norm": 1.3873829792776142, |
| "kl": 0.017431640625, |
| "learning_rate": 2e-07, |
| "loss": 0.010021258890628815, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45833334475755694, |
| "reward_std": 0.2770525634288788, |
| "rewards/MultiModalAccuracyORM": 0.45833334475755694, |
| "step": 1425, |
| "train_speed(iter/s)": 0.026059 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 296.05, |
| "epoch": 0.5777777777777777, |
| "grad_norm": 1.6565432442769377, |
| "kl": 0.0139190673828125, |
| "learning_rate": 2e-07, |
| "loss": 0.016829773783683777, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.40086200535297395, |
| "rewards/MultiModalAccuracyORM": 0.29166667312383654, |
| "step": 1430, |
| "train_speed(iter/s)": 0.026063 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.7, |
| "epoch": 0.5797979797979798, |
| "grad_norm": 1.2410328295318487, |
| "kl": 0.015863037109375, |
| "learning_rate": 2e-07, |
| "loss": -0.04091094434261322, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666731238365, |
| "reward_std": 0.3603756338357925, |
| "rewards/MultiModalAccuracyORM": 0.3166666731238365, |
| "step": 1435, |
| "train_speed(iter/s)": 0.026053 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 305.7, |
| "epoch": 0.5818181818181818, |
| "grad_norm": 2.659138324217993, |
| "kl": 0.01724853515625, |
| "learning_rate": 2e-07, |
| "loss": 0.08770001530647278, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333425223827, |
| "reward_std": 0.4456100821495056, |
| "rewards/MultiModalAccuracyORM": 0.3333333425223827, |
| "step": 1440, |
| "train_speed(iter/s)": 0.026045 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.15, |
| "epoch": 0.5838383838383838, |
| "grad_norm": 2.6855533659279462, |
| "kl": 0.015350341796875, |
| "learning_rate": 2e-07, |
| "loss": -0.03101794719696045, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.2644129186868668, |
| "rewards/MultiModalAccuracyORM": 0.1666666716337204, |
| "step": 1445, |
| "train_speed(iter/s)": 0.026039 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.85, |
| "epoch": 0.5858585858585859, |
| "grad_norm": 0.8787033948980154, |
| "kl": 0.018048095703125, |
| "learning_rate": 2e-07, |
| "loss": 0.021743962168693544, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1500000014901161, |
| "reward_std": 0.2496483266353607, |
| "rewards/MultiModalAccuracyORM": 0.1500000014901161, |
| "step": 1450, |
| "train_speed(iter/s)": 0.026027 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 329.65, |
| "epoch": 0.5878787878787879, |
| "grad_norm": 2.6089377973235917, |
| "kl": 0.0154541015625, |
| "learning_rate": 2e-07, |
| "loss": -0.0126606285572052, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333730697633, |
| "reward_std": 0.287842845916748, |
| "rewards/MultiModalAccuracyORM": 0.25833333730697633, |
| "step": 1455, |
| "train_speed(iter/s)": 0.026012 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.55, |
| "epoch": 0.5898989898989899, |
| "grad_norm": 3.1599228273908895, |
| "kl": 0.017535400390625, |
| "learning_rate": 2e-07, |
| "loss": 0.03227808475494385, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000089406967, |
| "reward_std": 0.2754935622215271, |
| "rewards/MultiModalAccuracyORM": 0.3500000089406967, |
| "step": 1460, |
| "train_speed(iter/s)": 0.025992 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 247.35, |
| "epoch": 0.591919191919192, |
| "grad_norm": 3.772779516485284, |
| "kl": 0.016162109375, |
| "learning_rate": 2e-07, |
| "loss": -0.006427288055419922, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.3214506834745407, |
| "rewards/MultiModalAccuracyORM": 0.21666667088866234, |
| "step": 1465, |
| "train_speed(iter/s)": 0.02598 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 300.7, |
| "epoch": 0.593939393939394, |
| "grad_norm": 1.9048234622524929, |
| "kl": 0.019964599609375, |
| "learning_rate": 2e-07, |
| "loss": 0.02089463174343109, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334550261495, |
| "reward_std": 0.39707074165344236, |
| "rewards/MultiModalAccuracyORM": 0.33333334550261495, |
| "step": 1470, |
| "train_speed(iter/s)": 0.025963 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 372.2, |
| "epoch": 0.5959595959595959, |
| "grad_norm": 1.7167051608215667, |
| "kl": 0.0126953125, |
| "learning_rate": 2e-07, |
| "loss": 0.0002398371696472168, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.3485645651817322, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 1475, |
| "train_speed(iter/s)": 0.025929 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.9, |
| "epoch": 0.597979797979798, |
| "grad_norm": 2.018355689891589, |
| "kl": 0.014324951171875, |
| "learning_rate": 2e-07, |
| "loss": 0.025476664304733276, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5250000104308128, |
| "reward_std": 0.3463323086500168, |
| "rewards/MultiModalAccuracyORM": 0.5250000104308128, |
| "step": 1480, |
| "train_speed(iter/s)": 0.025899 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.35, |
| "epoch": 0.6, |
| "grad_norm": 1.9564498539046626, |
| "kl": 0.013104248046875, |
| "learning_rate": 2e-07, |
| "loss": -0.0017219483852386475, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666753590107, |
| "reward_std": 0.3392761141061783, |
| "rewards/MultiModalAccuracyORM": 0.2666666753590107, |
| "step": 1485, |
| "train_speed(iter/s)": 0.025884 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 371.55, |
| "epoch": 0.602020202020202, |
| "grad_norm": 3.3586873596373836, |
| "kl": 0.0190948486328125, |
| "learning_rate": 2e-07, |
| "loss": -0.015026980638504028, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334550261495, |
| "reward_std": 0.43529842495918275, |
| "rewards/MultiModalAccuracyORM": 0.33333334550261495, |
| "step": 1490, |
| "train_speed(iter/s)": 0.02585 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.1, |
| "epoch": 0.604040404040404, |
| "grad_norm": 1.5566031738878978, |
| "kl": 0.0152313232421875, |
| "learning_rate": 2e-07, |
| "loss": 0.05221402645111084, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4666666716337204, |
| "reward_std": 0.3853524446487427, |
| "rewards/MultiModalAccuracyORM": 0.4666666716337204, |
| "step": 1495, |
| "train_speed(iter/s)": 0.025826 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 1.092725055214899, |
| "learning_rate": 2e-07, |
| "loss": 0.044440290331840514, |
| "memory(GiB)": 87.45, |
| "step": 1500, |
| "train_speed(iter/s)": 0.025794 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 332.07667766571046, |
| "eval_kl": 0.03210205078125, |
| "eval_loss": 0.03433879837393761, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.32333334147930143, |
| "eval_reward_std": 0.34949765503406527, |
| "eval_rewards/MultiModalAccuracyORM": 0.32333334147930143, |
| "eval_runtime": 946.9078, |
| "eval_samples_per_second": 0.053, |
| "eval_steps_per_second": 0.005, |
| "step": 1500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.3, |
| "epoch": 0.6080808080808081, |
| "grad_norm": 1.594406200527781, |
| "kl": 0.01402130126953125, |
| "learning_rate": 2e-07, |
| "loss": 0.011821150779724121, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000447034836, |
| "reward_std": 0.29021300822496415, |
| "rewards/MultiModalAccuracyORM": 0.30000000447034836, |
| "step": 1505, |
| "train_speed(iter/s)": 0.02519 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 310.35, |
| "epoch": 0.6101010101010101, |
| "grad_norm": 1.872354266566466, |
| "kl": 0.01295166015625, |
| "learning_rate": 2e-07, |
| "loss": 0.040472963452339174, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667237877845, |
| "reward_std": 0.24481281042098998, |
| "rewards/MultiModalAccuracyORM": 0.39166667237877845, |
| "step": 1510, |
| "train_speed(iter/s)": 0.025138 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.25, |
| "epoch": 0.6121212121212121, |
| "grad_norm": 2.2298458448624032, |
| "kl": 0.017498779296875, |
| "learning_rate": 2e-07, |
| "loss": -0.003679761290550232, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666753590107, |
| "reward_std": 0.33752005696296694, |
| "rewards/MultiModalAccuracyORM": 0.2666666753590107, |
| "step": 1515, |
| "train_speed(iter/s)": 0.02512 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 495.3, |
| "epoch": 0.6141414141414141, |
| "grad_norm": 2.1057358539094637, |
| "kl": 0.013360595703125, |
| "learning_rate": 2e-07, |
| "loss": -0.040804427862167356, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666667088866234, |
| "reward_std": 0.22625695466995238, |
| "rewards/MultiModalAccuracyORM": 0.11666667088866234, |
| "step": 1520, |
| "train_speed(iter/s)": 0.025038 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 355.45, |
| "epoch": 0.6161616161616161, |
| "grad_norm": 1.7271901034384924, |
| "kl": 0.01778564453125, |
| "learning_rate": 2e-07, |
| "loss": 0.04612007737159729, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667759418485, |
| "reward_std": 0.385197651386261, |
| "rewards/MultiModalAccuracyORM": 0.29166667759418485, |
| "step": 1525, |
| "train_speed(iter/s)": 0.025 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 331.4, |
| "epoch": 0.6181818181818182, |
| "grad_norm": 2.251271699623951, |
| "kl": 0.015338134765625, |
| "learning_rate": 2e-07, |
| "loss": 0.07724932432174683, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40833334177732467, |
| "reward_std": 0.39786076843738555, |
| "rewards/MultiModalAccuracyORM": 0.40833334177732467, |
| "step": 1530, |
| "train_speed(iter/s)": 0.024971 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 469.35, |
| "epoch": 0.6202020202020202, |
| "grad_norm": 3.517799255266591, |
| "kl": 0.021319580078125, |
| "learning_rate": 2e-07, |
| "loss": -0.042039293050765994, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2000000074505806, |
| "reward_std": 0.27122943103313446, |
| "rewards/MultiModalAccuracyORM": 0.2000000074505806, |
| "step": 1535, |
| "train_speed(iter/s)": 0.02492 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.3, |
| "epoch": 0.6222222222222222, |
| "grad_norm": 2.5032184616862736, |
| "kl": 0.023309326171875, |
| "learning_rate": 2e-07, |
| "loss": 0.004111546277999878, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667386889457, |
| "reward_std": 0.36037562787532806, |
| "rewards/MultiModalAccuracyORM": 0.36666667386889457, |
| "step": 1540, |
| "train_speed(iter/s)": 0.024886 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.65, |
| "epoch": 0.6242424242424243, |
| "grad_norm": 1.3788944987112297, |
| "kl": 0.018865966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.03875549137592316, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.14166666939854622, |
| "reward_std": 0.275529146194458, |
| "rewards/MultiModalAccuracyORM": 0.14166666939854622, |
| "step": 1545, |
| "train_speed(iter/s)": 0.02484 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 418.9, |
| "epoch": 0.6262626262626263, |
| "grad_norm": 1.8495513561932837, |
| "kl": 0.02667236328125, |
| "learning_rate": 2e-07, |
| "loss": 0.006523740291595459, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333656191825, |
| "reward_std": 0.3144540905952454, |
| "rewards/MultiModalAccuracyORM": 0.15833333656191825, |
| "step": 1550, |
| "train_speed(iter/s)": 0.024776 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 346.2, |
| "epoch": 0.6282828282828283, |
| "grad_norm": 1.753463603338966, |
| "kl": 0.030621337890625, |
| "learning_rate": 2e-07, |
| "loss": -0.08293852806091309, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000149011614, |
| "reward_std": 0.23083824515342713, |
| "rewards/MultiModalAccuracyORM": 0.25000000149011614, |
| "step": 1555, |
| "train_speed(iter/s)": 0.02475 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 382.7, |
| "epoch": 0.6303030303030303, |
| "grad_norm": 2.663595112199716, |
| "kl": 0.0219970703125, |
| "learning_rate": 2e-07, |
| "loss": -0.002608485519886017, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35000000447034835, |
| "reward_std": 0.27756678462028506, |
| "rewards/MultiModalAccuracyORM": 0.35000000447034835, |
| "step": 1560, |
| "train_speed(iter/s)": 0.024719 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.55, |
| "epoch": 0.6323232323232323, |
| "grad_norm": 1.803682568463378, |
| "kl": 0.02052001953125, |
| "learning_rate": 2e-07, |
| "loss": -0.031521540880203244, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333440124989, |
| "reward_std": 0.24935851097106934, |
| "rewards/MultiModalAccuracyORM": 0.2833333440124989, |
| "step": 1565, |
| "train_speed(iter/s)": 0.024694 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 464.5, |
| "epoch": 0.6343434343434343, |
| "grad_norm": 1.9551331787297712, |
| "kl": 0.012725830078125, |
| "learning_rate": 2e-07, |
| "loss": 0.016904991865158082, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000059604645, |
| "reward_std": 0.3863160490989685, |
| "rewards/MultiModalAccuracyORM": 0.2250000059604645, |
| "step": 1570, |
| "train_speed(iter/s)": 0.024625 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 280.85, |
| "epoch": 0.6363636363636364, |
| "grad_norm": 2.19696821448914, |
| "kl": 0.016156005859375, |
| "learning_rate": 2e-07, |
| "loss": 0.00793578326702118, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333333656191826, |
| "reward_std": 0.2260383188724518, |
| "rewards/MultiModalAccuracyORM": 0.38333333656191826, |
| "step": 1575, |
| "train_speed(iter/s)": 0.024598 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 346.35, |
| "epoch": 0.6383838383838384, |
| "grad_norm": 0.10124868688137513, |
| "kl": 0.016912841796875, |
| "learning_rate": 2e-07, |
| "loss": -0.007649339735507965, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666666865348815, |
| "reward_std": 0.23634997606277466, |
| "rewards/MultiModalAccuracyORM": 0.26666666865348815, |
| "step": 1580, |
| "train_speed(iter/s)": 0.024579 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 409.0, |
| "epoch": 0.6404040404040404, |
| "grad_norm": 1.6301877045933517, |
| "kl": 0.012744140625, |
| "learning_rate": 2e-07, |
| "loss": 0.013163220882415772, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000052154064, |
| "reward_std": 0.3906099498271942, |
| "rewards/MultiModalAccuracyORM": 0.3250000052154064, |
| "step": 1585, |
| "train_speed(iter/s)": 0.024565 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.05, |
| "epoch": 0.6424242424242425, |
| "grad_norm": 2.155746940066879, |
| "kl": 0.016387939453125, |
| "learning_rate": 2e-07, |
| "loss": -0.006454774737358093, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2333333373069763, |
| "reward_std": 0.2855865776538849, |
| "rewards/MultiModalAccuracyORM": 0.2333333373069763, |
| "step": 1590, |
| "train_speed(iter/s)": 0.024526 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.0, |
| "epoch": 0.6444444444444445, |
| "grad_norm": 2.831254989761031, |
| "kl": 0.0135467529296875, |
| "learning_rate": 2e-07, |
| "loss": 0.04445863664150238, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000149011612, |
| "reward_std": 0.28787843585014344, |
| "rewards/MultiModalAccuracyORM": 0.20000000149011612, |
| "step": 1595, |
| "train_speed(iter/s)": 0.024495 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.45, |
| "epoch": 0.6464646464646465, |
| "grad_norm": 1.4752518445027274, |
| "kl": 0.017083740234375, |
| "learning_rate": 2e-07, |
| "loss": 0.03578461408615112, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666668131947516, |
| "reward_std": 0.33704385757446287, |
| "rewards/MultiModalAccuracyORM": 0.36666668131947516, |
| "step": 1600, |
| "train_speed(iter/s)": 0.024476 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 355.65, |
| "epoch": 0.6484848484848484, |
| "grad_norm": 0.9187218241799472, |
| "kl": 0.016937255859375, |
| "learning_rate": 2e-07, |
| "loss": 0.02192138433456421, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000111758709, |
| "reward_std": 0.3222196638584137, |
| "rewards/MultiModalAccuracyORM": 0.3500000111758709, |
| "step": 1605, |
| "train_speed(iter/s)": 0.024426 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 388.35, |
| "epoch": 0.6505050505050505, |
| "grad_norm": 1.7973159194566164, |
| "kl": 0.0144775390625, |
| "learning_rate": 2e-07, |
| "loss": 0.01784837543964386, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000298023223, |
| "reward_std": 0.2689731627702713, |
| "rewards/MultiModalAccuracyORM": 0.12500000298023223, |
| "step": 1610, |
| "train_speed(iter/s)": 0.024388 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 397.55, |
| "epoch": 0.6525252525252525, |
| "grad_norm": 2.0318711993448617, |
| "kl": 0.018182373046875, |
| "learning_rate": 2e-07, |
| "loss": -0.02051687240600586, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333380520344, |
| "reward_std": 0.261207589507103, |
| "rewards/MultiModalAccuracyORM": 0.4333333380520344, |
| "step": 1615, |
| "train_speed(iter/s)": 0.024346 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 339.3, |
| "epoch": 0.6545454545454545, |
| "grad_norm": 1.9030819605130962, |
| "kl": 0.0175079345703125, |
| "learning_rate": 2e-07, |
| "loss": 0.06623161435127259, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000596046446, |
| "reward_std": 0.24885829985141755, |
| "rewards/MultiModalAccuracyORM": 0.37500000596046446, |
| "step": 1620, |
| "train_speed(iter/s)": 0.024315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 280.4, |
| "epoch": 0.6565656565656566, |
| "grad_norm": 2.08045815446475, |
| "kl": 0.0169708251953125, |
| "learning_rate": 2e-07, |
| "loss": -0.013642898201942444, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000029802322, |
| "reward_std": 0.3378098726272583, |
| "rewards/MultiModalAccuracyORM": 0.2500000029802322, |
| "step": 1625, |
| "train_speed(iter/s)": 0.024289 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.75, |
| "epoch": 0.6585858585858586, |
| "grad_norm": 1.436661872799103, |
| "kl": 0.0193359375, |
| "learning_rate": 2e-07, |
| "loss": 0.02239292562007904, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333805203437, |
| "reward_std": 0.2629852324724197, |
| "rewards/MultiModalAccuracyORM": 0.15833333805203437, |
| "step": 1630, |
| "train_speed(iter/s)": 0.024248 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 270.4, |
| "epoch": 0.6606060606060606, |
| "grad_norm": 2.5008411774286494, |
| "kl": 0.020758056640625, |
| "learning_rate": 2e-07, |
| "loss": 0.02127687931060791, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333432674408, |
| "reward_std": 0.3023863762617111, |
| "rewards/MultiModalAccuracyORM": 0.4083333432674408, |
| "step": 1635, |
| "train_speed(iter/s)": 0.024227 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.8, |
| "epoch": 0.6626262626262627, |
| "grad_norm": 2.6410537415459125, |
| "kl": 0.02030029296875, |
| "learning_rate": 2e-07, |
| "loss": 0.05219934582710266, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667684912684, |
| "reward_std": 0.35006397068500517, |
| "rewards/MultiModalAccuracyORM": 0.41666667684912684, |
| "step": 1640, |
| "train_speed(iter/s)": 0.024199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.0, |
| "epoch": 0.6646464646464646, |
| "grad_norm": 2.4569826375450914, |
| "kl": 0.01795654296875, |
| "learning_rate": 2e-07, |
| "loss": 0.013086378574371338, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45000001341104506, |
| "reward_std": 0.3337643891572952, |
| "rewards/MultiModalAccuracyORM": 0.45000001341104506, |
| "step": 1645, |
| "train_speed(iter/s)": 0.024168 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 304.8, |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.9280627341583514, |
| "kl": 0.015191650390625, |
| "learning_rate": 2e-07, |
| "loss": 0.01907120943069458, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000447034836, |
| "reward_std": 0.30035116970539094, |
| "rewards/MultiModalAccuracyORM": 0.15000000447034836, |
| "step": 1650, |
| "train_speed(iter/s)": 0.024141 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.0, |
| "epoch": 0.6686868686868687, |
| "grad_norm": 2.6312715310589687, |
| "kl": 0.015863037109375, |
| "learning_rate": 2e-07, |
| "loss": -0.04063203632831573, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333656191825, |
| "reward_std": 0.25741389989852903, |
| "rewards/MultiModalAccuracyORM": 0.15833333656191825, |
| "step": 1655, |
| "train_speed(iter/s)": 0.024076 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 394.05, |
| "epoch": 0.6707070707070707, |
| "grad_norm": 0.9566291807644657, |
| "kl": 0.015057373046875, |
| "learning_rate": 2e-07, |
| "loss": 0.018163633346557618, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667610406875, |
| "reward_std": 0.28446817994117735, |
| "rewards/MultiModalAccuracyORM": 0.26666667610406875, |
| "step": 1660, |
| "train_speed(iter/s)": 0.024043 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 326.85, |
| "epoch": 0.6727272727272727, |
| "grad_norm": 1.9521868347750622, |
| "kl": 0.019769287109375, |
| "learning_rate": 2e-07, |
| "loss": -5.202591419219971e-05, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667535901069, |
| "reward_std": 0.23481498062610626, |
| "rewards/MultiModalAccuracyORM": 0.21666667535901069, |
| "step": 1665, |
| "train_speed(iter/s)": 0.024026 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 316.4, |
| "epoch": 0.6747474747474748, |
| "grad_norm": 2.1472683375029757, |
| "kl": 0.01842041015625, |
| "learning_rate": 2e-07, |
| "loss": 0.08016844987869262, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667461395264, |
| "reward_std": 0.29655990600585935, |
| "rewards/MultiModalAccuracyORM": 0.24166667461395264, |
| "step": 1670, |
| "train_speed(iter/s)": 0.024002 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.15, |
| "epoch": 0.6767676767676768, |
| "grad_norm": 2.136669782149022, |
| "kl": 0.012603759765625, |
| "learning_rate": 2e-07, |
| "loss": 0.03559441566467285, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333334103226662, |
| "reward_std": 0.31266487538814547, |
| "rewards/MultiModalAccuracyORM": 0.18333334103226662, |
| "step": 1675, |
| "train_speed(iter/s)": 0.023983 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 349.7, |
| "epoch": 0.6787878787878788, |
| "grad_norm": 2.5120224393696056, |
| "kl": 0.033984375, |
| "learning_rate": 2e-07, |
| "loss": -0.02109343409538269, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333410322666, |
| "reward_std": 0.2629852324724197, |
| "rewards/MultiModalAccuracyORM": 0.2083333410322666, |
| "step": 1680, |
| "train_speed(iter/s)": 0.02395 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 224.85, |
| "epoch": 0.6808080808080809, |
| "grad_norm": 2.7291188101039268, |
| "kl": 0.0185638427734375, |
| "learning_rate": 2e-07, |
| "loss": 0.06400806307792664, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.441666679084301, |
| "reward_std": 0.3586460083723068, |
| "rewards/MultiModalAccuracyORM": 0.441666679084301, |
| "step": 1685, |
| "train_speed(iter/s)": 0.023931 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 204.25, |
| "epoch": 0.6828282828282828, |
| "grad_norm": 2.473418035792826, |
| "kl": 0.03394775390625, |
| "learning_rate": 2e-07, |
| "loss": 0.042749062180519104, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000044703484, |
| "reward_std": 0.30718872845172884, |
| "rewards/MultiModalAccuracyORM": 0.3250000044703484, |
| "step": 1690, |
| "train_speed(iter/s)": 0.023921 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.45, |
| "epoch": 0.6848484848484848, |
| "grad_norm": 1.4363881715878042, |
| "kl": 0.023870849609375, |
| "learning_rate": 2e-07, |
| "loss": 0.007241478562355042, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000022351742, |
| "reward_std": 0.3244373768568039, |
| "rewards/MultiModalAccuracyORM": 0.3500000022351742, |
| "step": 1695, |
| "train_speed(iter/s)": 0.023909 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 315.0, |
| "epoch": 0.6868686868686869, |
| "grad_norm": 2.953319073134284, |
| "kl": 0.023052978515625, |
| "learning_rate": 2e-07, |
| "loss": -0.010269761085510254, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000596046447, |
| "reward_std": 0.3144540905952454, |
| "rewards/MultiModalAccuracyORM": 0.32500000596046447, |
| "step": 1700, |
| "train_speed(iter/s)": 0.023894 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 281.35, |
| "epoch": 0.6888888888888889, |
| "grad_norm": 2.565868939994401, |
| "kl": 0.02255859375, |
| "learning_rate": 2e-07, |
| "loss": 0.018953490257263183, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3583333417773247, |
| "reward_std": 0.30840655863285066, |
| "rewards/MultiModalAccuracyORM": 0.3583333417773247, |
| "step": 1705, |
| "train_speed(iter/s)": 0.023885 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.9, |
| "epoch": 0.6909090909090909, |
| "grad_norm": 0.6694533298035624, |
| "kl": 0.01932373046875, |
| "learning_rate": 2e-07, |
| "loss": 0.008337923884391784, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667535901069, |
| "reward_std": 0.2652415007352829, |
| "rewards/MultiModalAccuracyORM": 0.21666667535901069, |
| "step": 1710, |
| "train_speed(iter/s)": 0.02387 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 275.95, |
| "epoch": 0.692929292929293, |
| "grad_norm": 1.567189433294113, |
| "kl": 0.030279541015625, |
| "learning_rate": 2e-07, |
| "loss": 0.03896563053131104, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667759418485, |
| "reward_std": 0.35563530325889586, |
| "rewards/MultiModalAccuracyORM": 0.41666667759418485, |
| "step": 1715, |
| "train_speed(iter/s)": 0.023857 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 263.45, |
| "epoch": 0.694949494949495, |
| "grad_norm": 1.8167696383064045, |
| "kl": 0.0214141845703125, |
| "learning_rate": 2e-07, |
| "loss": 0.020650827884674074, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333484828472, |
| "reward_std": 0.39936017990112305, |
| "rewards/MultiModalAccuracyORM": 0.4333333484828472, |
| "step": 1720, |
| "train_speed(iter/s)": 0.023843 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.55, |
| "epoch": 0.696969696969697, |
| "grad_norm": 2.213186558232037, |
| "kl": 0.0275634765625, |
| "learning_rate": 2e-07, |
| "loss": -0.008746334910392761, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.341666679084301, |
| "reward_std": 0.35490245223045347, |
| "rewards/MultiModalAccuracyORM": 0.341666679084301, |
| "step": 1725, |
| "train_speed(iter/s)": 0.023835 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 343.9, |
| "epoch": 0.6989898989898989, |
| "grad_norm": 2.601045176615316, |
| "kl": 0.021826171875, |
| "learning_rate": 2e-07, |
| "loss": -0.03737230598926544, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667461395265, |
| "reward_std": 0.38179769814014436, |
| "rewards/MultiModalAccuracyORM": 0.31666667461395265, |
| "step": 1730, |
| "train_speed(iter/s)": 0.023822 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.3, |
| "epoch": 0.701010101010101, |
| "grad_norm": 0.9407841462948962, |
| "kl": 0.0240234375, |
| "learning_rate": 2e-07, |
| "loss": -0.0031855762004852294, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000089406967, |
| "reward_std": 0.2511385798454285, |
| "rewards/MultiModalAccuracyORM": 0.2750000089406967, |
| "step": 1735, |
| "train_speed(iter/s)": 0.023807 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 262.95, |
| "epoch": 0.703030303030303, |
| "grad_norm": 2.6759259468484413, |
| "kl": 0.0215087890625, |
| "learning_rate": 2e-07, |
| "loss": 0.025629484653472902, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.34166667312383653, |
| "reward_std": 0.3390218883752823, |
| "rewards/MultiModalAccuracyORM": 0.34166667312383653, |
| "step": 1740, |
| "train_speed(iter/s)": 0.023819 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 280.85, |
| "epoch": 0.705050505050505, |
| "grad_norm": 1.6215662631256935, |
| "kl": 0.043084716796875, |
| "learning_rate": 2e-07, |
| "loss": 0.01873619556427002, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333395421505, |
| "reward_std": 0.23631438612937927, |
| "rewards/MultiModalAccuracyORM": 0.3083333395421505, |
| "step": 1745, |
| "train_speed(iter/s)": 0.023814 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "grad_norm": 3.313730122510265, |
| "learning_rate": 2e-07, |
| "loss": -0.041856271028518674, |
| "memory(GiB)": 87.45, |
| "step": 1750, |
| "train_speed(iter/s)": 0.023773 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 318.58167419433596, |
| "eval_kl": 0.0221929931640625, |
| "eval_loss": 0.0349855050444603, |
| "eval_response_clip_ratio": 0.001666666716337204, |
| "eval_reward": 0.2950000064074993, |
| "eval_reward_std": 0.3137217426300049, |
| "eval_rewards/MultiModalAccuracyORM": 0.2950000064074993, |
| "eval_runtime": 782.5117, |
| "eval_samples_per_second": 0.064, |
| "eval_steps_per_second": 0.006, |
| "step": 1750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.775, |
| "epoch": 0.7090909090909091, |
| "grad_norm": 1.446054468361364, |
| "kl": 0.0215576171875, |
| "learning_rate": 2e-07, |
| "loss": 0.013345304131507873, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.3380433991551399, |
| "rewards/MultiModalAccuracyORM": 0.23333333805203438, |
| "step": 1755, |
| "train_speed(iter/s)": 0.023419 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.45, |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.3947630704883345, |
| "kl": 0.018634033203125, |
| "learning_rate": 2e-07, |
| "loss": 0.010007500648498535, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000521540643, |
| "reward_std": 0.27148365676403047, |
| "rewards/MultiModalAccuracyORM": 0.17500000521540643, |
| "step": 1760, |
| "train_speed(iter/s)": 0.023454 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 283.5, |
| "epoch": 0.7131313131313132, |
| "grad_norm": 2.218781010019711, |
| "kl": 0.021832275390625, |
| "learning_rate": 2e-07, |
| "loss": -0.013157431781291962, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333656191825, |
| "reward_std": 0.2652770906686783, |
| "rewards/MultiModalAccuracyORM": 0.15833333656191825, |
| "step": 1765, |
| "train_speed(iter/s)": 0.023491 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 256.75, |
| "epoch": 0.7151515151515152, |
| "grad_norm": 1.7430710535513718, |
| "kl": 0.01793212890625, |
| "learning_rate": 2e-07, |
| "loss": 0.021530145406723024, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667610406875, |
| "reward_std": 0.3066769391298294, |
| "rewards/MultiModalAccuracyORM": 0.26666667610406875, |
| "step": 1770, |
| "train_speed(iter/s)": 0.023528 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 265.35, |
| "epoch": 0.7171717171717171, |
| "grad_norm": 1.7339756470338048, |
| "kl": 0.014569091796875, |
| "learning_rate": 2e-07, |
| "loss": -0.058446085453033446, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.2820172876119614, |
| "rewards/MultiModalAccuracyORM": 0.31666667237877844, |
| "step": 1775, |
| "train_speed(iter/s)": 0.023563 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.0, |
| "epoch": 0.7191919191919192, |
| "grad_norm": 1.6384172396752068, |
| "kl": 0.0145233154296875, |
| "learning_rate": 2e-07, |
| "loss": -0.00234740674495697, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.33333334177732465, |
| "reward_std": 0.3890485167503357, |
| "rewards/MultiModalAccuracyORM": 0.33333334177732465, |
| "step": 1780, |
| "train_speed(iter/s)": 0.02359 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 391.6, |
| "epoch": 0.7212121212121212, |
| "grad_norm": 2.6878660022854333, |
| "kl": 0.016748046875, |
| "learning_rate": 2e-07, |
| "loss": 0.03554516434669495, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333395421505, |
| "reward_std": 0.35974039435386657, |
| "rewards/MultiModalAccuracyORM": 0.2833333395421505, |
| "step": 1785, |
| "train_speed(iter/s)": 0.023622 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.25, |
| "epoch": 0.7232323232323232, |
| "grad_norm": 2.4324428426946834, |
| "kl": 0.0128204345703125, |
| "learning_rate": 2e-07, |
| "loss": -0.047456872463226316, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333730697633, |
| "reward_std": 0.2970361053943634, |
| "rewards/MultiModalAccuracyORM": 0.25833333730697633, |
| "step": 1790, |
| "train_speed(iter/s)": 0.023655 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 343.65, |
| "epoch": 0.7252525252525253, |
| "grad_norm": 1.8618904482502028, |
| "kl": 0.0149169921875, |
| "learning_rate": 2e-07, |
| "loss": 0.009033694863319397, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333387970924, |
| "reward_std": 0.21750431060791015, |
| "rewards/MultiModalAccuracyORM": 0.2583333387970924, |
| "step": 1795, |
| "train_speed(iter/s)": 0.023686 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.05, |
| "epoch": 0.7272727272727273, |
| "grad_norm": 3.36471551001556, |
| "kl": 0.02044677734375, |
| "learning_rate": 2e-07, |
| "loss": 0.010516098141670227, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666731238365, |
| "reward_std": 0.21218962371349334, |
| "rewards/MultiModalAccuracyORM": 0.3166666731238365, |
| "step": 1800, |
| "train_speed(iter/s)": 0.023721 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.05, |
| "epoch": 0.7292929292929293, |
| "grad_norm": 3.723751882855137, |
| "kl": 0.023046875, |
| "learning_rate": 2e-07, |
| "loss": -0.02001919746398926, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333447575569, |
| "reward_std": 0.28128686249256135, |
| "rewards/MultiModalAccuracyORM": 0.4083333447575569, |
| "step": 1805, |
| "train_speed(iter/s)": 0.023755 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 335.35, |
| "epoch": 0.7313131313131314, |
| "grad_norm": 54.701999328620005, |
| "kl": 0.02723388671875, |
| "learning_rate": 2e-07, |
| "loss": 0.03721327781677246, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2416666753590107, |
| "reward_std": 0.2910481750965118, |
| "rewards/MultiModalAccuracyORM": 0.2416666753590107, |
| "step": 1810, |
| "train_speed(iter/s)": 0.02379 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 225.45, |
| "epoch": 0.7333333333333333, |
| "grad_norm": 3.0855092667576733, |
| "kl": 0.015704345703125, |
| "learning_rate": 2e-07, |
| "loss": -0.037659955024719236, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667759418486, |
| "reward_std": 0.36648276150226594, |
| "rewards/MultiModalAccuracyORM": 0.36666667759418486, |
| "step": 1815, |
| "train_speed(iter/s)": 0.023829 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 306.3, |
| "epoch": 0.7353535353535353, |
| "grad_norm": 2.1896027058768217, |
| "kl": 0.01336669921875, |
| "learning_rate": 2e-07, |
| "loss": 0.02186403125524521, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4416666753590107, |
| "reward_std": 0.2956440091133118, |
| "rewards/MultiModalAccuracyORM": 0.4416666753590107, |
| "step": 1820, |
| "train_speed(iter/s)": 0.023865 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 297.95, |
| "epoch": 0.7373737373737373, |
| "grad_norm": 1.540468825830471, |
| "kl": 0.010992431640625, |
| "learning_rate": 2e-07, |
| "loss": 0.03888830542564392, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1583333395421505, |
| "reward_std": 0.21368902921676636, |
| "rewards/MultiModalAccuracyORM": 0.1583333395421505, |
| "step": 1825, |
| "train_speed(iter/s)": 0.023899 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.15, |
| "epoch": 0.7393939393939394, |
| "grad_norm": 49.26742721312377, |
| "kl": 0.0157135009765625, |
| "learning_rate": 2e-07, |
| "loss": -0.0031795650720596313, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1333333395421505, |
| "reward_std": 0.2736803233623505, |
| "rewards/MultiModalAccuracyORM": 0.1333333395421505, |
| "step": 1830, |
| "train_speed(iter/s)": 0.023929 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.0, |
| "epoch": 0.7414141414141414, |
| "grad_norm": 1.2425141205561836, |
| "kl": 0.0211181640625, |
| "learning_rate": 2e-07, |
| "loss": -0.01690070778131485, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.33905747830867766, |
| "rewards/MultiModalAccuracyORM": 0.31666667237877844, |
| "step": 1835, |
| "train_speed(iter/s)": 0.023961 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 377.15, |
| "epoch": 0.7434343434343434, |
| "grad_norm": 2.8910783603707144, |
| "kl": 0.0198638916015625, |
| "learning_rate": 2e-07, |
| "loss": 0.06207960844039917, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000001043081285, |
| "reward_std": 0.38306058645248414, |
| "rewards/MultiModalAccuracyORM": 0.40000001043081285, |
| "step": 1840, |
| "train_speed(iter/s)": 0.023987 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 398.35, |
| "epoch": 0.7454545454545455, |
| "grad_norm": 14.235626745032626, |
| "kl": 0.019775390625, |
| "learning_rate": 2e-07, |
| "loss": 0.037658247351646426, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.16666666865348817, |
| "reward_std": 0.12708649039268494, |
| "rewards/MultiModalAccuracyORM": 0.16666666865348817, |
| "step": 1845, |
| "train_speed(iter/s)": 0.024018 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 328.4, |
| "epoch": 0.7474747474747475, |
| "grad_norm": 1.833635434555557, |
| "kl": 0.018505859375, |
| "learning_rate": 2e-07, |
| "loss": -0.026553609967231752, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3750000111758709, |
| "reward_std": 0.34710129499435427, |
| "rewards/MultiModalAccuracyORM": 0.3750000111758709, |
| "step": 1850, |
| "train_speed(iter/s)": 0.024051 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 394.6, |
| "epoch": 0.7494949494949495, |
| "grad_norm": 1.825594490175896, |
| "kl": 0.02091064453125, |
| "learning_rate": 2e-07, |
| "loss": 0.02868058383464813, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334550261495, |
| "reward_std": 0.3127244710922241, |
| "rewards/MultiModalAccuracyORM": 0.33333334550261495, |
| "step": 1855, |
| "train_speed(iter/s)": 0.024084 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 387.45, |
| "epoch": 0.7515151515151515, |
| "grad_norm": 1.3722283938123239, |
| "kl": 0.023919677734375, |
| "learning_rate": 2e-07, |
| "loss": 0.017566892504692077, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.14166666865348815, |
| "reward_std": 0.32900004684925077, |
| "rewards/MultiModalAccuracyORM": 0.14166666865348815, |
| "step": 1860, |
| "train_speed(iter/s)": 0.024119 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.25, |
| "epoch": 0.7535353535353535, |
| "grad_norm": 3.3603602877653964, |
| "kl": 0.023779296875, |
| "learning_rate": 2e-07, |
| "loss": 0.051629495620727536, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334177732465, |
| "reward_std": 0.4036242991685867, |
| "rewards/MultiModalAccuracyORM": 0.33333334177732465, |
| "step": 1865, |
| "train_speed(iter/s)": 0.02415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 306.4, |
| "epoch": 0.7555555555555555, |
| "grad_norm": 4.690429815238561, |
| "kl": 0.0260162353515625, |
| "learning_rate": 2e-07, |
| "loss": -0.004315692186355591, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000029802322, |
| "reward_std": 0.2940108567476273, |
| "rewards/MultiModalAccuracyORM": 0.2500000029802322, |
| "step": 1870, |
| "train_speed(iter/s)": 0.024182 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 274.45, |
| "epoch": 0.7575757575757576, |
| "grad_norm": 2.7051519330762646, |
| "kl": 0.0303466796875, |
| "learning_rate": 2e-07, |
| "loss": -0.008211909234523774, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833334103226663, |
| "reward_std": 0.3237069517374039, |
| "rewards/MultiModalAccuracyORM": 0.25833334103226663, |
| "step": 1875, |
| "train_speed(iter/s)": 0.024217 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 409.5, |
| "epoch": 0.7595959595959596, |
| "grad_norm": 2.8417211154013895, |
| "kl": 0.02593994140625, |
| "learning_rate": 2e-07, |
| "loss": 0.061132901906967164, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000670552255, |
| "reward_std": 0.40261563658714294, |
| "rewards/MultiModalAccuracyORM": 0.27500000670552255, |
| "step": 1880, |
| "train_speed(iter/s)": 0.024247 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 396.45, |
| "epoch": 0.7616161616161616, |
| "grad_norm": 2.730755662335053, |
| "kl": 0.026220703125, |
| "learning_rate": 2e-07, |
| "loss": 0.036236304044723514, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1666666731238365, |
| "reward_std": 0.3101543754339218, |
| "rewards/MultiModalAccuracyORM": 0.1666666731238365, |
| "step": 1885, |
| "train_speed(iter/s)": 0.024279 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 275.0, |
| "epoch": 0.7636363636363637, |
| "grad_norm": 1.777471986992103, |
| "kl": 0.025811767578125, |
| "learning_rate": 2e-07, |
| "loss": 0.010323920845985412, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666701436043, |
| "reward_std": 0.2506715327501297, |
| "rewards/MultiModalAccuracyORM": 0.3916666701436043, |
| "step": 1890, |
| "train_speed(iter/s)": 0.024315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 428.5, |
| "epoch": 0.7656565656565657, |
| "grad_norm": 0.13037300867268706, |
| "kl": 0.030450439453125, |
| "learning_rate": 2e-07, |
| "loss": 0.0042250391095876695, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35000001043081286, |
| "reward_std": 0.3182337760925293, |
| "rewards/MultiModalAccuracyORM": 0.35000001043081286, |
| "step": 1895, |
| "train_speed(iter/s)": 0.024345 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 329.7, |
| "epoch": 0.7676767676767676, |
| "grad_norm": 1.7511437916198835, |
| "kl": 0.016363525390625, |
| "learning_rate": 2e-07, |
| "loss": 0.006176537275314331, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.44166667461395265, |
| "reward_std": 0.2988493382930756, |
| "rewards/MultiModalAccuracyORM": 0.44166667461395265, |
| "step": 1900, |
| "train_speed(iter/s)": 0.024374 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 262.25, |
| "epoch": 0.7696969696969697, |
| "grad_norm": 2.6784748457723992, |
| "kl": 0.026043701171875, |
| "learning_rate": 2e-07, |
| "loss": -0.0650195300579071, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4416666753590107, |
| "reward_std": 0.4098664551973343, |
| "rewards/MultiModalAccuracyORM": 0.4416666753590107, |
| "step": 1905, |
| "train_speed(iter/s)": 0.024412 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.75, |
| "epoch": 0.7717171717171717, |
| "grad_norm": 2.0646305839430648, |
| "kl": 0.027471923828125, |
| "learning_rate": 2e-07, |
| "loss": 0.023633481562137605, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833334401249884, |
| "reward_std": 0.375223833322525, |
| "rewards/MultiModalAccuracyORM": 0.30833334401249884, |
| "step": 1910, |
| "train_speed(iter/s)": 0.024446 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 312.0, |
| "epoch": 0.7737373737373737, |
| "grad_norm": 1.9430903927913294, |
| "kl": 0.018585205078125, |
| "learning_rate": 2e-07, |
| "loss": -0.023164969682693482, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.3330695480108261, |
| "rewards/MultiModalAccuracyORM": 0.31666667237877844, |
| "step": 1915, |
| "train_speed(iter/s)": 0.024483 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.55, |
| "epoch": 0.7757575757575758, |
| "grad_norm": 1.2487710271189274, |
| "kl": 0.0145263671875, |
| "learning_rate": 2e-07, |
| "loss": 0.014984607696533203, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333334028720856, |
| "reward_std": 0.2784802496433258, |
| "rewards/MultiModalAccuracyORM": 0.38333334028720856, |
| "step": 1920, |
| "train_speed(iter/s)": 0.024514 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 301.65, |
| "epoch": 0.7777777777777778, |
| "grad_norm": 3.397172729657377, |
| "kl": 0.025823974609375, |
| "learning_rate": 2e-07, |
| "loss": 0.010728538036346436, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45000001341104506, |
| "reward_std": 0.36237767040729524, |
| "rewards/MultiModalAccuracyORM": 0.45000001341104506, |
| "step": 1925, |
| "train_speed(iter/s)": 0.024547 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.9, |
| "epoch": 0.7797979797979798, |
| "grad_norm": 2.445242624274772, |
| "kl": 0.02085418701171875, |
| "learning_rate": 2e-07, |
| "loss": 0.0506191611289978, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.366666679084301, |
| "reward_std": 0.3425410449504852, |
| "rewards/MultiModalAccuracyORM": 0.366666679084301, |
| "step": 1930, |
| "train_speed(iter/s)": 0.024581 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.95, |
| "epoch": 0.7818181818181819, |
| "grad_norm": 2.2267041732312953, |
| "kl": 0.0191650390625, |
| "learning_rate": 2e-07, |
| "loss": 0.07460187673568726, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1750000037252903, |
| "reward_std": 0.27998208105564115, |
| "rewards/MultiModalAccuracyORM": 0.1750000037252903, |
| "step": 1935, |
| "train_speed(iter/s)": 0.024609 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 358.15, |
| "epoch": 0.7838383838383839, |
| "grad_norm": 0.08307319969608204, |
| "kl": 0.01834716796875, |
| "learning_rate": 2e-07, |
| "loss": 0.01801389306783676, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334028720856, |
| "reward_std": 0.2292436480522156, |
| "rewards/MultiModalAccuracyORM": 0.23333334028720856, |
| "step": 1940, |
| "train_speed(iter/s)": 0.024633 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.25, |
| "epoch": 0.7858585858585858, |
| "grad_norm": 2.4956737169852876, |
| "kl": 0.0243896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.02604297399520874, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4416666768491268, |
| "reward_std": 0.23860624432563782, |
| "rewards/MultiModalAccuracyORM": 0.4416666768491268, |
| "step": 1945, |
| "train_speed(iter/s)": 0.024666 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 365.6, |
| "epoch": 0.7878787878787878, |
| "grad_norm": 1.412421381873315, |
| "kl": 0.03074951171875, |
| "learning_rate": 2e-07, |
| "loss": -0.008066686987876891, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.3619014710187912, |
| "rewards/MultiModalAccuracyORM": 0.3166666753590107, |
| "step": 1950, |
| "train_speed(iter/s)": 0.0247 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 316.4, |
| "epoch": 0.7898989898989899, |
| "grad_norm": 2.581974028906461, |
| "kl": 0.0264404296875, |
| "learning_rate": 2e-07, |
| "loss": 0.0021781913936138155, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.6333333432674408, |
| "reward_std": 0.34636789858341216, |
| "rewards/MultiModalAccuracyORM": 0.6333333432674408, |
| "step": 1955, |
| "train_speed(iter/s)": 0.024735 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.8, |
| "epoch": 0.7919191919191919, |
| "grad_norm": 2.7977079078012546, |
| "kl": 0.030804443359375, |
| "learning_rate": 2e-07, |
| "loss": 0.028843042254447938, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333879709245, |
| "reward_std": 0.18488111793994905, |
| "rewards/MultiModalAccuracyORM": 0.23333333879709245, |
| "step": 1960, |
| "train_speed(iter/s)": 0.02477 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.4, |
| "epoch": 0.793939393939394, |
| "grad_norm": 2.3766146998216606, |
| "kl": 0.029986572265625, |
| "learning_rate": 2e-07, |
| "loss": 0.01644158363342285, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5250000111758709, |
| "reward_std": 0.3782962501049042, |
| "rewards/MultiModalAccuracyORM": 0.5250000111758709, |
| "step": 1965, |
| "train_speed(iter/s)": 0.024803 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 413.2, |
| "epoch": 0.795959595959596, |
| "grad_norm": 1.6454459000922825, |
| "kl": 0.03331298828125, |
| "learning_rate": 2e-07, |
| "loss": 0.04098441600799561, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666805744171, |
| "reward_std": 0.31651573479175565, |
| "rewards/MultiModalAccuracyORM": 0.3916666805744171, |
| "step": 1970, |
| "train_speed(iter/s)": 0.024833 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.5, |
| "epoch": 0.797979797979798, |
| "grad_norm": 2.676941712540541, |
| "kl": 0.033160400390625, |
| "learning_rate": 2e-07, |
| "loss": -0.06822603344917297, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333395421505, |
| "reward_std": 0.26591232419013977, |
| "rewards/MultiModalAccuracyORM": 0.4083333395421505, |
| "step": 1975, |
| "train_speed(iter/s)": 0.024862 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.05, |
| "epoch": 0.8, |
| "grad_norm": 2.6654647292288565, |
| "kl": 0.03338623046875, |
| "learning_rate": 2e-07, |
| "loss": 0.018979550898075105, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333410322666, |
| "reward_std": 0.2988493382930756, |
| "rewards/MultiModalAccuracyORM": 0.3083333410322666, |
| "step": 1980, |
| "train_speed(iter/s)": 0.024892 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.9, |
| "epoch": 0.802020202020202, |
| "grad_norm": 1.2773941729876779, |
| "kl": 0.02757568359375, |
| "learning_rate": 2e-07, |
| "loss": 0.0032975614070892335, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3666666716337204, |
| "reward_std": 0.21999078691005708, |
| "rewards/MultiModalAccuracyORM": 0.3666666716337204, |
| "step": 1985, |
| "train_speed(iter/s)": 0.024918 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 318.45, |
| "epoch": 0.804040404040404, |
| "grad_norm": 3.249804741680811, |
| "kl": 0.0233734130859375, |
| "learning_rate": 2e-07, |
| "loss": -0.0009274959564208984, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000447034837, |
| "reward_std": 0.3111390322446823, |
| "rewards/MultiModalAccuracyORM": 0.25000000447034837, |
| "step": 1990, |
| "train_speed(iter/s)": 0.024952 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.3, |
| "epoch": 0.806060606060606, |
| "grad_norm": 1.6358353140611315, |
| "kl": 0.02435302734375, |
| "learning_rate": 2e-07, |
| "loss": 0.01845797598361969, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333380520344, |
| "reward_std": 0.31345489621162415, |
| "rewards/MultiModalAccuracyORM": 0.3083333380520344, |
| "step": 1995, |
| "train_speed(iter/s)": 0.024983 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 2.5769756858186366, |
| "learning_rate": 2e-07, |
| "loss": -0.03718583881855011, |
| "memory(GiB)": 87.45, |
| "step": 2000, |
| "train_speed(iter/s)": 0.025016 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 323.9533418273926, |
| "eval_kl": 0.0281341552734375, |
| "eval_loss": 0.006039996165782213, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.318333340883255, |
| "eval_reward_std": 0.32694393634796143, |
| "eval_rewards/MultiModalAccuracyORM": 0.318333340883255, |
| "eval_runtime": 462.0456, |
| "eval_samples_per_second": 0.108, |
| "eval_steps_per_second": 0.011, |
| "step": 2000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.125, |
| "epoch": 0.8101010101010101, |
| "grad_norm": 1.7033276169087128, |
| "kl": 0.02674102783203125, |
| "learning_rate": 2e-07, |
| "loss": 0.03609513640403748, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23750001043081284, |
| "reward_std": 0.24687736183404924, |
| "rewards/MultiModalAccuracyORM": 0.23750001043081284, |
| "step": 2005, |
| "train_speed(iter/s)": 0.024793 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.55, |
| "epoch": 0.8121212121212121, |
| "grad_norm": 1.77522203951707, |
| "kl": 0.0292724609375, |
| "learning_rate": 2e-07, |
| "loss": 0.01515505015850067, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000059604645, |
| "reward_std": 0.38405978083610537, |
| "rewards/MultiModalAccuracyORM": 0.3500000059604645, |
| "step": 2010, |
| "train_speed(iter/s)": 0.024823 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 288.5, |
| "epoch": 0.8141414141414142, |
| "grad_norm": 2.047124696336966, |
| "kl": 0.02886962890625, |
| "learning_rate": 2e-07, |
| "loss": -0.056891226768493654, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.350000012665987, |
| "reward_std": 0.3127244710922241, |
| "rewards/MultiModalAccuracyORM": 0.350000012665987, |
| "step": 2015, |
| "train_speed(iter/s)": 0.024857 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 273.0, |
| "epoch": 0.8161616161616162, |
| "grad_norm": 2.933718360724764, |
| "kl": 0.0226837158203125, |
| "learning_rate": 2e-07, |
| "loss": 0.04815356135368347, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666753590107, |
| "reward_std": 0.3597048044204712, |
| "rewards/MultiModalAccuracyORM": 0.3916666753590107, |
| "step": 2020, |
| "train_speed(iter/s)": 0.024891 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 334.85, |
| "epoch": 0.8181818181818182, |
| "grad_norm": 2.3099689560601595, |
| "kl": 0.015521240234375, |
| "learning_rate": 2e-07, |
| "loss": 0.00659940093755722, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4250000067055225, |
| "reward_std": 0.2574163258075714, |
| "rewards/MultiModalAccuracyORM": 0.4250000067055225, |
| "step": 2025, |
| "train_speed(iter/s)": 0.024922 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.5, |
| "epoch": 0.8202020202020202, |
| "grad_norm": 2.5439305675732165, |
| "kl": 0.019085693359375, |
| "learning_rate": 2e-07, |
| "loss": 0.0326183021068573, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3750000074505806, |
| "reward_std": 0.31040860116481783, |
| "rewards/MultiModalAccuracyORM": 0.3750000074505806, |
| "step": 2030, |
| "train_speed(iter/s)": 0.024949 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 431.5, |
| "epoch": 0.8222222222222222, |
| "grad_norm": 3.2829060035742557, |
| "kl": 0.023626708984375, |
| "learning_rate": 2e-07, |
| "loss": 0.015071746706962586, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666693985462, |
| "reward_std": 0.205923455953598, |
| "rewards/MultiModalAccuracyORM": 0.2666666693985462, |
| "step": 2035, |
| "train_speed(iter/s)": 0.024973 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 525.55, |
| "epoch": 0.8242424242424242, |
| "grad_norm": 2.658698100364113, |
| "kl": 0.0230712890625, |
| "learning_rate": 2e-07, |
| "loss": 0.013616405427455902, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333879709244, |
| "reward_std": 0.34936913549900056, |
| "rewards/MultiModalAccuracyORM": 0.28333333879709244, |
| "step": 2040, |
| "train_speed(iter/s)": 0.024998 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 238.15, |
| "epoch": 0.8262626262626263, |
| "grad_norm": 2.342715529046246, |
| "kl": 0.029901123046875, |
| "learning_rate": 2e-07, |
| "loss": 0.037117105722427365, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.36670139729976653, |
| "rewards/MultiModalAccuracyORM": 0.3166666753590107, |
| "step": 2045, |
| "train_speed(iter/s)": 0.025033 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 334.45, |
| "epoch": 0.8282828282828283, |
| "grad_norm": 0.9452733042514408, |
| "kl": 0.025860595703125, |
| "learning_rate": 2e-07, |
| "loss": 0.03209388256072998, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000074505805, |
| "reward_std": 0.26750934720039365, |
| "rewards/MultiModalAccuracyORM": 0.22500000074505805, |
| "step": 2050, |
| "train_speed(iter/s)": 0.025068 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.15, |
| "epoch": 0.8303030303030303, |
| "grad_norm": 2.136815117405037, |
| "kl": 0.0298553466796875, |
| "learning_rate": 2e-07, |
| "loss": 0.04463410079479217, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666753590107, |
| "reward_std": 0.4307381808757782, |
| "rewards/MultiModalAccuracyORM": 0.3416666753590107, |
| "step": 2055, |
| "train_speed(iter/s)": 0.025094 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 437.85, |
| "epoch": 0.8323232323232324, |
| "grad_norm": 1.7941689466428354, |
| "kl": 0.018414306640625, |
| "learning_rate": 2e-07, |
| "loss": -0.013085222244262696, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333333432674406, |
| "reward_std": 0.27756677865982055, |
| "rewards/MultiModalAccuracyORM": 0.33333333432674406, |
| "step": 2060, |
| "train_speed(iter/s)": 0.025121 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 373.6, |
| "epoch": 0.8343434343434344, |
| "grad_norm": 2.741809894885581, |
| "kl": 0.0217803955078125, |
| "learning_rate": 2e-07, |
| "loss": 0.032400667667388916, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.3207202583551407, |
| "rewards/MultiModalAccuracyORM": 0.2083333395421505, |
| "step": 2065, |
| "train_speed(iter/s)": 0.025146 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.7, |
| "epoch": 0.8363636363636363, |
| "grad_norm": 1.5317649365927353, |
| "kl": 0.02591552734375, |
| "learning_rate": 2e-07, |
| "loss": 0.026116135716438293, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666746139526, |
| "reward_std": 0.3315081149339676, |
| "rewards/MultiModalAccuracyORM": 0.2916666746139526, |
| "step": 2070, |
| "train_speed(iter/s)": 0.02517 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 298.3, |
| "epoch": 0.8383838383838383, |
| "grad_norm": 2.2493040161672164, |
| "kl": 0.023297119140625, |
| "learning_rate": 2e-07, |
| "loss": 0.011263298988342284, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3666666723787785, |
| "reward_std": 0.27122943103313446, |
| "rewards/MultiModalAccuracyORM": 0.3666666723787785, |
| "step": 2075, |
| "train_speed(iter/s)": 0.025195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 327.35, |
| "epoch": 0.8404040404040404, |
| "grad_norm": 1.6803752878651963, |
| "kl": 0.05001220703125, |
| "learning_rate": 2e-07, |
| "loss": 0.021441753208637237, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333387970924, |
| "reward_std": 0.3531844109296799, |
| "rewards/MultiModalAccuracyORM": 0.3833333387970924, |
| "step": 2080, |
| "train_speed(iter/s)": 0.025225 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.15, |
| "epoch": 0.8424242424242424, |
| "grad_norm": 1.980173450589181, |
| "kl": 0.0163818359375, |
| "learning_rate": 2e-07, |
| "loss": 0.013161852955818176, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.22625695466995238, |
| "rewards/MultiModalAccuracyORM": 0.21666667088866234, |
| "step": 2085, |
| "train_speed(iter/s)": 0.025254 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.1, |
| "epoch": 0.8444444444444444, |
| "grad_norm": 1.0010632093343366, |
| "kl": 0.017938232421875, |
| "learning_rate": 2e-07, |
| "loss": -0.0012541890144348144, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000223517418, |
| "reward_std": 0.2916341096162796, |
| "rewards/MultiModalAccuracyORM": 0.15000000223517418, |
| "step": 2090, |
| "train_speed(iter/s)": 0.025273 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 326.0, |
| "epoch": 0.8464646464646465, |
| "grad_norm": 1.8276205217385537, |
| "kl": 0.0211029052734375, |
| "learning_rate": 2e-07, |
| "loss": 0.018240103125572206, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666708886623, |
| "reward_std": 0.35748412609100344, |
| "rewards/MultiModalAccuracyORM": 0.2916666708886623, |
| "step": 2095, |
| "train_speed(iter/s)": 0.0253 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.3, |
| "epoch": 0.8484848484848485, |
| "grad_norm": 2.25183174936328, |
| "kl": 0.0171142578125, |
| "learning_rate": 2e-07, |
| "loss": -0.0015764832496643066, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666775941849, |
| "reward_std": 0.2782260239124298, |
| "rewards/MultiModalAccuracyORM": 0.3916666775941849, |
| "step": 2100, |
| "train_speed(iter/s)": 0.02533 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 411.65, |
| "epoch": 0.8505050505050505, |
| "grad_norm": 2.301476369720727, |
| "kl": 0.02381591796875, |
| "learning_rate": 2e-07, |
| "loss": 0.02723083198070526, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.2500000037252903, |
| "reward_std": 0.3780420243740082, |
| "rewards/MultiModalAccuracyORM": 0.2500000037252903, |
| "step": 2105, |
| "train_speed(iter/s)": 0.025351 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 342.2, |
| "epoch": 0.8525252525252526, |
| "grad_norm": 2.2465362796243915, |
| "kl": 0.031561279296875, |
| "learning_rate": 2e-07, |
| "loss": -0.006004461646080017, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4500000111758709, |
| "reward_std": 0.386061829328537, |
| "rewards/MultiModalAccuracyORM": 0.4500000111758709, |
| "step": 2110, |
| "train_speed(iter/s)": 0.025381 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 430.45, |
| "epoch": 0.8545454545454545, |
| "grad_norm": 0.034882262330713364, |
| "kl": 0.01632537841796875, |
| "learning_rate": 2e-07, |
| "loss": 0.07573002576828003, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333358168602, |
| "reward_std": 0.3058815211057663, |
| "rewards/MultiModalAccuracyORM": 0.2083333358168602, |
| "step": 2115, |
| "train_speed(iter/s)": 0.025404 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 278.3, |
| "epoch": 0.8565656565656565, |
| "grad_norm": 1.8179385747560524, |
| "kl": 0.01519775390625, |
| "learning_rate": 2e-07, |
| "loss": 0.046589908003807065, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000596046447, |
| "reward_std": 0.28446818590164186, |
| "rewards/MultiModalAccuracyORM": 0.20000000596046447, |
| "step": 2120, |
| "train_speed(iter/s)": 0.025437 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 303.8, |
| "epoch": 0.8585858585858586, |
| "grad_norm": 1.842386637827148, |
| "kl": 0.023931884765625, |
| "learning_rate": 2e-07, |
| "loss": 0.0047568708658218386, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000141561031, |
| "reward_std": 0.32924269437789916, |
| "rewards/MultiModalAccuracyORM": 0.2500000141561031, |
| "step": 2125, |
| "train_speed(iter/s)": 0.025467 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 412.4, |
| "epoch": 0.8606060606060606, |
| "grad_norm": 3.12980971819249, |
| "kl": 0.0230224609375, |
| "learning_rate": 2e-07, |
| "loss": 0.012965646386146546, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.3833333387970924, |
| "reward_std": 0.3985911935567856, |
| "rewards/MultiModalAccuracyORM": 0.3833333387970924, |
| "step": 2130, |
| "train_speed(iter/s)": 0.025486 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 322.65, |
| "epoch": 0.8626262626262626, |
| "grad_norm": 0.9262722343921138, |
| "kl": 0.018023681640625, |
| "learning_rate": 2e-07, |
| "loss": 0.0012422390282154083, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000149011613, |
| "reward_std": 0.1808116167783737, |
| "rewards/MultiModalAccuracyORM": 0.17500000149011613, |
| "step": 2135, |
| "train_speed(iter/s)": 0.025513 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 335.7, |
| "epoch": 0.8646464646464647, |
| "grad_norm": 1.0357905764180717, |
| "kl": 0.01571044921875, |
| "learning_rate": 2e-07, |
| "loss": 0.0018387317657470703, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667014360426, |
| "reward_std": 0.25490583181381227, |
| "rewards/MultiModalAccuracyORM": 0.29166667014360426, |
| "step": 2140, |
| "train_speed(iter/s)": 0.025545 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.1, |
| "epoch": 0.8666666666666667, |
| "grad_norm": 2.379354282182724, |
| "kl": 0.019244384765625, |
| "learning_rate": 2e-07, |
| "loss": 0.028354501724243163, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000111758709, |
| "reward_std": 0.2963056802749634, |
| "rewards/MultiModalAccuracyORM": 0.3000000111758709, |
| "step": 2145, |
| "train_speed(iter/s)": 0.025579 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.1, |
| "epoch": 0.8686868686868687, |
| "grad_norm": 1.257926920186221, |
| "kl": 0.0236419677734375, |
| "learning_rate": 2e-07, |
| "loss": 0.05731485486030578, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4500000074505806, |
| "reward_std": 0.24860407412052155, |
| "rewards/MultiModalAccuracyORM": 0.4500000074505806, |
| "step": 2150, |
| "train_speed(iter/s)": 0.025607 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.65, |
| "epoch": 0.8707070707070707, |
| "grad_norm": 0.4145211028011141, |
| "kl": 0.035430908203125, |
| "learning_rate": 2e-07, |
| "loss": -0.008838014304637909, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.0416666679084301, |
| "reward_std": 0.12552748322486879, |
| "rewards/MultiModalAccuracyORM": 0.0416666679084301, |
| "step": 2155, |
| "train_speed(iter/s)": 0.025637 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 481.6, |
| "epoch": 0.8727272727272727, |
| "grad_norm": 3.5679392309928852, |
| "kl": 0.020635986328125, |
| "learning_rate": 2e-07, |
| "loss": -0.04596620798110962, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.21666667237877846, |
| "reward_std": 0.3494287371635437, |
| "rewards/MultiModalAccuracyORM": 0.21666667237877846, |
| "step": 2160, |
| "train_speed(iter/s)": 0.02566 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 312.8, |
| "epoch": 0.8747474747474747, |
| "grad_norm": 2.915431806582569, |
| "kl": 0.03173828125, |
| "learning_rate": 2e-07, |
| "loss": 0.03424719870090485, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45000000670552254, |
| "reward_std": 0.3579271614551544, |
| "rewards/MultiModalAccuracyORM": 0.45000000670552254, |
| "step": 2165, |
| "train_speed(iter/s)": 0.025692 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.9, |
| "epoch": 0.8767676767676768, |
| "grad_norm": 1.2438809288674397, |
| "kl": 0.02581787109375, |
| "learning_rate": 2e-07, |
| "loss": 0.022351789474487304, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000022351742, |
| "reward_std": 0.2556006669998169, |
| "rewards/MultiModalAccuracyORM": 0.2250000022351742, |
| "step": 2170, |
| "train_speed(iter/s)": 0.025718 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.35, |
| "epoch": 0.8787878787878788, |
| "grad_norm": 0.08213166464110444, |
| "kl": 0.0291015625, |
| "learning_rate": 2e-07, |
| "loss": -0.04905802011489868, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334028720856, |
| "reward_std": 0.3343147337436676, |
| "rewards/MultiModalAccuracyORM": 0.23333334028720856, |
| "step": 2175, |
| "train_speed(iter/s)": 0.025744 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.8, |
| "epoch": 0.8808080808080808, |
| "grad_norm": 1.2558474815848573, |
| "kl": 0.0392333984375, |
| "learning_rate": 2e-07, |
| "loss": 0.03639570772647858, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000052154064, |
| "reward_std": 0.40410049855709074, |
| "rewards/MultiModalAccuracyORM": 0.3500000052154064, |
| "step": 2180, |
| "train_speed(iter/s)": 0.025763 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 295.25, |
| "epoch": 0.8828282828282829, |
| "grad_norm": 2.2083604873690255, |
| "kl": 0.02174072265625, |
| "learning_rate": 2e-07, |
| "loss": 0.04861523509025574, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667386889457, |
| "reward_std": 0.4242177873849869, |
| "rewards/MultiModalAccuracyORM": 0.36666667386889457, |
| "step": 2185, |
| "train_speed(iter/s)": 0.025791 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 421.4, |
| "epoch": 0.8848484848484849, |
| "grad_norm": 1.9173115593509535, |
| "kl": 0.02357177734375, |
| "learning_rate": 2e-07, |
| "loss": 0.013380092382431031, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000074505806, |
| "reward_std": 0.311967608332634, |
| "rewards/MultiModalAccuracyORM": 0.3000000074505806, |
| "step": 2190, |
| "train_speed(iter/s)": 0.025813 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.5, |
| "epoch": 0.8868686868686869, |
| "grad_norm": 1.3588226440942046, |
| "kl": 0.025439453125, |
| "learning_rate": 2e-07, |
| "loss": 0.011188817024230958, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.18326250910758973, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 2195, |
| "train_speed(iter/s)": 0.025832 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.1, |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.8037621160022852, |
| "kl": 0.034747314453125, |
| "learning_rate": 2e-07, |
| "loss": 0.04917380511760712, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333879709245, |
| "reward_std": 0.3719944924116135, |
| "rewards/MultiModalAccuracyORM": 0.23333333879709245, |
| "step": 2200, |
| "train_speed(iter/s)": 0.025862 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 482.15, |
| "epoch": 0.8909090909090909, |
| "grad_norm": 2.141711868124079, |
| "kl": 0.0226776123046875, |
| "learning_rate": 2e-07, |
| "loss": -0.018071025609970093, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666666939854622, |
| "reward_std": 0.23030244410037995, |
| "rewards/MultiModalAccuracyORM": 0.11666666939854622, |
| "step": 2205, |
| "train_speed(iter/s)": 0.025882 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 501.4, |
| "epoch": 0.8929292929292929, |
| "grad_norm": 1.4394465065225663, |
| "kl": 0.03128662109375, |
| "learning_rate": 2e-07, |
| "loss": 0.019231194257736207, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333380520344, |
| "reward_std": 0.31740519404411316, |
| "rewards/MultiModalAccuracyORM": 0.1833333380520344, |
| "step": 2210, |
| "train_speed(iter/s)": 0.025901 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 268.9, |
| "epoch": 0.8949494949494949, |
| "grad_norm": 1.8778711843519251, |
| "kl": 0.03623046875, |
| "learning_rate": 2e-07, |
| "loss": 0.042392924427986145, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000104308128, |
| "reward_std": 0.24866368174552916, |
| "rewards/MultiModalAccuracyORM": 0.3000000104308128, |
| "step": 2215, |
| "train_speed(iter/s)": 0.025928 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.6, |
| "epoch": 0.896969696969697, |
| "grad_norm": 2.783501622971831, |
| "kl": 0.02158203125, |
| "learning_rate": 2e-07, |
| "loss": -0.009627214074134827, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000074505806, |
| "reward_std": 0.30665292739868166, |
| "rewards/MultiModalAccuracyORM": 0.3250000074505806, |
| "step": 2220, |
| "train_speed(iter/s)": 0.025959 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 304.35, |
| "epoch": 0.898989898989899, |
| "grad_norm": 64.84162647185127, |
| "kl": 0.042742919921875, |
| "learning_rate": 2e-07, |
| "loss": 0.027672123908996583, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1250000014901161, |
| "reward_std": 0.24265173375606536, |
| "rewards/MultiModalAccuracyORM": 0.1250000014901161, |
| "step": 2225, |
| "train_speed(iter/s)": 0.025989 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.25, |
| "epoch": 0.901010101010101, |
| "grad_norm": 2.756817795935333, |
| "kl": 0.027203369140625, |
| "learning_rate": 2e-07, |
| "loss": -0.0488799124956131, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000149011612, |
| "reward_std": 0.2922547996044159, |
| "rewards/MultiModalAccuracyORM": 0.20000000149011612, |
| "step": 2230, |
| "train_speed(iter/s)": 0.026019 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 331.1, |
| "epoch": 0.9030303030303031, |
| "grad_norm": 3.484265646880912, |
| "kl": 0.0185455322265625, |
| "learning_rate": 2e-07, |
| "loss": -0.006375116109848022, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333447575569, |
| "reward_std": 0.2692514002323151, |
| "rewards/MultiModalAccuracyORM": 0.4083333447575569, |
| "step": 2235, |
| "train_speed(iter/s)": 0.026045 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 310.6, |
| "epoch": 0.9050505050505051, |
| "grad_norm": 0.08112989718996635, |
| "kl": 0.026385498046875, |
| "learning_rate": 2e-07, |
| "loss": 0.07493855953216552, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000670552253, |
| "reward_std": 0.2915985196828842, |
| "rewards/MultiModalAccuracyORM": 0.22500000670552253, |
| "step": 2240, |
| "train_speed(iter/s)": 0.026071 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 383.5, |
| "epoch": 0.907070707070707, |
| "grad_norm": 2.1571772688182276, |
| "kl": 0.02109375, |
| "learning_rate": 2e-07, |
| "loss": -0.008470755815505982, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333656191825, |
| "reward_std": 0.1808116227388382, |
| "rewards/MultiModalAccuracyORM": 0.15833333656191825, |
| "step": 2245, |
| "train_speed(iter/s)": 0.026093 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 2.4521268907747036, |
| "learning_rate": 2e-07, |
| "loss": 0.02900133728981018, |
| "memory(GiB)": 87.45, |
| "step": 2250, |
| "train_speed(iter/s)": 0.026122 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 326.39667755126953, |
| "eval_kl": 0.0267205810546875, |
| "eval_loss": 0.02248476631939411, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.3383333416283131, |
| "eval_reward_std": 0.30222029507160186, |
| "eval_rewards/MultiModalAccuracyORM": 0.3383333416283131, |
| "eval_runtime": 479.1069, |
| "eval_samples_per_second": 0.104, |
| "eval_steps_per_second": 0.01, |
| "step": 2250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 293.825, |
| "epoch": 0.9111111111111111, |
| "grad_norm": 2.997368813220566, |
| "kl": 0.02721710205078125, |
| "learning_rate": 2e-07, |
| "loss": 0.003950953483581543, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4541666753590107, |
| "reward_std": 0.3525440260767937, |
| "rewards/MultiModalAccuracyORM": 0.4541666753590107, |
| "step": 2255, |
| "train_speed(iter/s)": 0.025886 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 221.3, |
| "epoch": 0.9131313131313131, |
| "grad_norm": 3.095107484502175, |
| "kl": 0.0549560546875, |
| "learning_rate": 2e-07, |
| "loss": 0.006377041339874268, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4750000052154064, |
| "reward_std": 0.30114119648933413, |
| "rewards/MultiModalAccuracyORM": 0.4750000052154064, |
| "step": 2260, |
| "train_speed(iter/s)": 0.025918 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 326.8, |
| "epoch": 0.9151515151515152, |
| "grad_norm": 2.764452940040707, |
| "kl": 0.025128173828125, |
| "learning_rate": 2e-07, |
| "loss": -0.060949933528900144, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.3563301384449005, |
| "rewards/MultiModalAccuracyORM": 0.2833333402872086, |
| "step": 2265, |
| "train_speed(iter/s)": 0.025947 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 308.85, |
| "epoch": 0.9171717171717172, |
| "grad_norm": 1.6613189303519411, |
| "kl": 0.0338897705078125, |
| "learning_rate": 2e-07, |
| "loss": 0.030397918820381165, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334177732468, |
| "reward_std": 0.21600489914417267, |
| "rewards/MultiModalAccuracyORM": 0.23333334177732468, |
| "step": 2270, |
| "train_speed(iter/s)": 0.025974 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 268.75, |
| "epoch": 0.9191919191919192, |
| "grad_norm": 2.4104355223612903, |
| "kl": 0.043646240234375, |
| "learning_rate": 2e-07, |
| "loss": 0.02471620440483093, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667088866236, |
| "reward_std": 0.22880061268806456, |
| "rewards/MultiModalAccuracyORM": 0.39166667088866236, |
| "step": 2275, |
| "train_speed(iter/s)": 0.026005 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 223.65, |
| "epoch": 0.9212121212121213, |
| "grad_norm": 0.9890862252945101, |
| "kl": 0.0232696533203125, |
| "learning_rate": 2e-07, |
| "loss": -0.019132834672927857, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.43333334773778914, |
| "reward_std": 0.28934226334095003, |
| "rewards/MultiModalAccuracyORM": 0.43333334773778914, |
| "step": 2280, |
| "train_speed(iter/s)": 0.026037 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 290.95, |
| "epoch": 0.9232323232323232, |
| "grad_norm": 2.8529813646862565, |
| "kl": 0.016925048828125, |
| "learning_rate": 2e-07, |
| "loss": 0.02090049088001251, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334401249887, |
| "reward_std": 0.25286819934844973, |
| "rewards/MultiModalAccuracyORM": 0.33333334401249887, |
| "step": 2285, |
| "train_speed(iter/s)": 0.026068 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.05, |
| "epoch": 0.9252525252525252, |
| "grad_norm": 1.89117356154723, |
| "kl": 0.0194671630859375, |
| "learning_rate": 2e-07, |
| "loss": 0.006132407486438752, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.33449481427669525, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 2290, |
| "train_speed(iter/s)": 0.026093 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.9, |
| "epoch": 0.9272727272727272, |
| "grad_norm": 1.5821722224404322, |
| "kl": 0.0285491943359375, |
| "learning_rate": 2e-07, |
| "loss": -0.055334615707397464, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000089406967, |
| "reward_std": 0.3450992465019226, |
| "rewards/MultiModalAccuracyORM": 0.3250000089406967, |
| "step": 2295, |
| "train_speed(iter/s)": 0.026121 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.45, |
| "epoch": 0.9292929292929293, |
| "grad_norm": 1.0631048809606616, |
| "kl": 0.0221282958984375, |
| "learning_rate": 2e-07, |
| "loss": 0.04601133763790131, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000298023224, |
| "reward_std": 0.3211964577436447, |
| "rewards/MultiModalAccuracyORM": 0.17500000298023224, |
| "step": 2300, |
| "train_speed(iter/s)": 0.026144 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 383.7, |
| "epoch": 0.9313131313131313, |
| "grad_norm": 2.2872062972102016, |
| "kl": 0.013800048828125, |
| "learning_rate": 2e-07, |
| "loss": -0.06729268431663513, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.308333345502615, |
| "reward_std": 0.42669269144535066, |
| "rewards/MultiModalAccuracyORM": 0.308333345502615, |
| "step": 2305, |
| "train_speed(iter/s)": 0.026168 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 301.05, |
| "epoch": 0.9333333333333333, |
| "grad_norm": 1.5571796305098269, |
| "kl": 0.015960693359375, |
| "learning_rate": 2e-07, |
| "loss": 0.019453226029872893, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000111758709, |
| "reward_std": 0.2812868684530258, |
| "rewards/MultiModalAccuracyORM": 0.2750000111758709, |
| "step": 2310, |
| "train_speed(iter/s)": 0.02619 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 354.5, |
| "epoch": 0.9353535353535354, |
| "grad_norm": 1.2789781364913986, |
| "kl": 0.0262939453125, |
| "learning_rate": 2e-07, |
| "loss": -0.014371034502983094, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667610406875, |
| "reward_std": 0.35789157152175904, |
| "rewards/MultiModalAccuracyORM": 0.39166667610406875, |
| "step": 2315, |
| "train_speed(iter/s)": 0.026212 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 315.75, |
| "epoch": 0.9373737373737374, |
| "grad_norm": 2.0043648431803742, |
| "kl": 0.0160247802734375, |
| "learning_rate": 2e-07, |
| "loss": 0.004941976815462113, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666753590107, |
| "reward_std": 0.3945602476596832, |
| "rewards/MultiModalAccuracyORM": 0.2666666753590107, |
| "step": 2320, |
| "train_speed(iter/s)": 0.026239 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 397.45, |
| "epoch": 0.9393939393939394, |
| "grad_norm": 2.434275159571036, |
| "kl": 0.0218505859375, |
| "learning_rate": 2e-07, |
| "loss": 0.015783283114433288, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000001192092896, |
| "reward_std": 0.44407508671283724, |
| "rewards/MultiModalAccuracyORM": 0.30000001192092896, |
| "step": 2325, |
| "train_speed(iter/s)": 0.026266 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 341.45, |
| "epoch": 0.9414141414141414, |
| "grad_norm": 3.3518880188766262, |
| "kl": 0.0180023193359375, |
| "learning_rate": 2e-07, |
| "loss": 0.004853534698486328, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500001266598704, |
| "reward_std": 0.3925822228193283, |
| "rewards/MultiModalAccuracyORM": 0.37500001266598704, |
| "step": 2330, |
| "train_speed(iter/s)": 0.026293 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 434.3, |
| "epoch": 0.9434343434343434, |
| "grad_norm": 2.162505598086888, |
| "kl": 0.018927001953125, |
| "learning_rate": 2e-07, |
| "loss": 0.06589244604110718, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.05, |
| "reward": 0.3666666693985462, |
| "reward_std": 0.2581467509269714, |
| "rewards/MultiModalAccuracyORM": 0.3666666693985462, |
| "step": 2335, |
| "train_speed(iter/s)": 0.026311 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.1, |
| "epoch": 0.9454545454545454, |
| "grad_norm": 2.6990455984773494, |
| "kl": 0.0258880615234375, |
| "learning_rate": 2e-07, |
| "loss": 0.007903063297271728, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333656191825, |
| "reward_std": 0.3127004593610764, |
| "rewards/MultiModalAccuracyORM": 0.15833333656191825, |
| "step": 2340, |
| "train_speed(iter/s)": 0.026336 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 189.5, |
| "epoch": 0.9474747474747475, |
| "grad_norm": 31.778104916563368, |
| "kl": 0.046075439453125, |
| "learning_rate": 2e-07, |
| "loss": -0.046237149834632875, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334624767303, |
| "reward_std": 0.3485885769128799, |
| "rewards/MultiModalAccuracyORM": 0.28333334624767303, |
| "step": 2345, |
| "train_speed(iter/s)": 0.026363 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 413.05, |
| "epoch": 0.9494949494949495, |
| "grad_norm": 1.8887972983852979, |
| "kl": 0.0284271240234375, |
| "learning_rate": 2e-07, |
| "loss": -0.044114714860916136, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667610406877, |
| "reward_std": 0.3408351272344589, |
| "rewards/MultiModalAccuracyORM": 0.29166667610406877, |
| "step": 2350, |
| "train_speed(iter/s)": 0.026385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.95, |
| "epoch": 0.9515151515151515, |
| "grad_norm": 2.719100446764501, |
| "kl": 0.0343994140625, |
| "learning_rate": 2e-07, |
| "loss": 0.030634421110153198, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333334028720856, |
| "reward_std": 0.379781112074852, |
| "rewards/MultiModalAccuracyORM": 0.38333334028720856, |
| "step": 2355, |
| "train_speed(iter/s)": 0.026406 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.7, |
| "epoch": 0.9535353535353536, |
| "grad_norm": 2.4658627482626816, |
| "kl": 0.033807373046875, |
| "learning_rate": 2e-07, |
| "loss": 0.026800933480262756, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4916666731238365, |
| "reward_std": 0.2393606811761856, |
| "rewards/MultiModalAccuracyORM": 0.4916666731238365, |
| "step": 2360, |
| "train_speed(iter/s)": 0.026436 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 360.25, |
| "epoch": 0.9555555555555556, |
| "grad_norm": 2.851734873550529, |
| "kl": 0.027685546875, |
| "learning_rate": 2e-07, |
| "loss": 0.013045597076416015, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000052154064, |
| "reward_std": 0.43759028911590575, |
| "rewards/MultiModalAccuracyORM": 0.3500000052154064, |
| "step": 2365, |
| "train_speed(iter/s)": 0.026463 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 315.05, |
| "epoch": 0.9575757575757575, |
| "grad_norm": 1.448742319519302, |
| "kl": 0.02066650390625, |
| "learning_rate": 2e-07, |
| "loss": -0.010880425572395325, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667237877847, |
| "reward_std": 0.3780420243740082, |
| "rewards/MultiModalAccuracyORM": 0.41666667237877847, |
| "step": 2370, |
| "train_speed(iter/s)": 0.026491 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 284.3, |
| "epoch": 0.9595959595959596, |
| "grad_norm": 1.7573565404253169, |
| "kl": 0.05279541015625, |
| "learning_rate": 2e-07, |
| "loss": -0.009101217985153199, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666805744171, |
| "reward_std": 0.32049004435539247, |
| "rewards/MultiModalAccuracyORM": 0.3916666805744171, |
| "step": 2375, |
| "train_speed(iter/s)": 0.026519 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 387.75, |
| "epoch": 0.9616161616161616, |
| "grad_norm": 1.3965100041612641, |
| "kl": 0.02640380859375, |
| "learning_rate": 2e-07, |
| "loss": 0.01602880358695984, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667759418485, |
| "reward_std": 0.3471368789672852, |
| "rewards/MultiModalAccuracyORM": 0.41666667759418485, |
| "step": 2380, |
| "train_speed(iter/s)": 0.026544 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 308.25, |
| "epoch": 0.9636363636363636, |
| "grad_norm": 2.2883768350459732, |
| "kl": 0.0283721923828125, |
| "learning_rate": 2e-07, |
| "loss": -0.02478056252002716, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333730697632, |
| "reward_std": 0.3252063632011414, |
| "rewards/MultiModalAccuracyORM": 0.18333333730697632, |
| "step": 2385, |
| "train_speed(iter/s)": 0.02657 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 269.4, |
| "epoch": 0.9656565656565657, |
| "grad_norm": 2.3698939133503027, |
| "kl": 0.027130126953125, |
| "learning_rate": 2e-07, |
| "loss": 0.0352479875087738, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.316666679084301, |
| "reward_std": 0.2815410941839218, |
| "rewards/MultiModalAccuracyORM": 0.316666679084301, |
| "step": 2390, |
| "train_speed(iter/s)": 0.0266 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 409.3, |
| "epoch": 0.9676767676767677, |
| "grad_norm": 2.6455515972771577, |
| "kl": 0.0282379150390625, |
| "learning_rate": 2e-07, |
| "loss": 0.02145477384328842, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666738688946, |
| "reward_std": 0.43726191222667693, |
| "rewards/MultiModalAccuracyORM": 0.3416666738688946, |
| "step": 2395, |
| "train_speed(iter/s)": 0.026624 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.1, |
| "epoch": 0.9696969696969697, |
| "grad_norm": 1.3800009626988052, |
| "kl": 0.023291015625, |
| "learning_rate": 2e-07, |
| "loss": 0.009223046898841857, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.09166666939854622, |
| "reward_std": 0.1850757420063019, |
| "rewards/MultiModalAccuracyORM": 0.09166666939854622, |
| "step": 2400, |
| "train_speed(iter/s)": 0.02665 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 264.95, |
| "epoch": 0.9717171717171718, |
| "grad_norm": 2.707313244667536, |
| "kl": 0.0386138916015625, |
| "learning_rate": 2e-07, |
| "loss": -0.016336160898208617, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333333730697633, |
| "reward_std": 0.24860407412052155, |
| "rewards/MultiModalAccuracyORM": 0.38333333730697633, |
| "step": 2405, |
| "train_speed(iter/s)": 0.026681 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.65, |
| "epoch": 0.9737373737373738, |
| "grad_norm": 2.6298064760318223, |
| "kl": 0.031060791015625, |
| "learning_rate": 2e-07, |
| "loss": -0.026252752542495726, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.3385047078132629, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 2410, |
| "train_speed(iter/s)": 0.0267 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 270.65, |
| "epoch": 0.9757575757575757, |
| "grad_norm": 2.0364458058384423, |
| "kl": 0.018072509765625, |
| "learning_rate": 2e-07, |
| "loss": -0.022683143615722656, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.483333345502615, |
| "reward_std": 0.2900991141796112, |
| "rewards/MultiModalAccuracyORM": 0.483333345502615, |
| "step": 2415, |
| "train_speed(iter/s)": 0.026729 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.65, |
| "epoch": 0.9777777777777777, |
| "grad_norm": 3.0539530097221843, |
| "kl": 0.0236175537109375, |
| "learning_rate": 2e-07, |
| "loss": -0.025200226902961732, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5166666835546494, |
| "reward_std": 0.3579155892133713, |
| "rewards/MultiModalAccuracyORM": 0.5166666835546494, |
| "step": 2420, |
| "train_speed(iter/s)": 0.026757 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 319.65, |
| "epoch": 0.9797979797979798, |
| "grad_norm": 2.837404902371068, |
| "kl": 0.0191802978515625, |
| "learning_rate": 2e-07, |
| "loss": -0.05283277034759522, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666679084301, |
| "reward_std": 0.24939410090446473, |
| "rewards/MultiModalAccuracyORM": 0.1916666679084301, |
| "step": 2425, |
| "train_speed(iter/s)": 0.026783 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 387.05, |
| "epoch": 0.9818181818181818, |
| "grad_norm": 1.2637214917941955, |
| "kl": 0.0302001953125, |
| "learning_rate": 2e-07, |
| "loss": 0.013781133294105529, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500001266598704, |
| "reward_std": 0.4204265236854553, |
| "rewards/MultiModalAccuracyORM": 0.37500001266598704, |
| "step": 2430, |
| "train_speed(iter/s)": 0.026802 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 297.8, |
| "epoch": 0.9838383838383838, |
| "grad_norm": 0.058208298350106734, |
| "kl": 0.0239227294921875, |
| "learning_rate": 2e-07, |
| "loss": 0.03573224246501923, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000670552254, |
| "reward_std": 0.26597192585468293, |
| "rewards/MultiModalAccuracyORM": 0.32500000670552254, |
| "step": 2435, |
| "train_speed(iter/s)": 0.02683 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 346.85, |
| "epoch": 0.9858585858585859, |
| "grad_norm": 1.6302602474729853, |
| "kl": 0.0171844482421875, |
| "learning_rate": 2e-07, |
| "loss": -0.012005738914012909, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666679084301, |
| "reward_std": 0.19717081785202026, |
| "rewards/MultiModalAccuracyORM": 0.1916666679084301, |
| "step": 2440, |
| "train_speed(iter/s)": 0.026853 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.0, |
| "epoch": 0.9878787878787879, |
| "grad_norm": 2.5433362450025765, |
| "kl": 0.0248321533203125, |
| "learning_rate": 2e-07, |
| "loss": -0.030718517303466798, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333879709245, |
| "reward_std": 0.30894235968589784, |
| "rewards/MultiModalAccuracyORM": 0.35833333879709245, |
| "step": 2445, |
| "train_speed(iter/s)": 0.026875 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.35, |
| "epoch": 0.98989898989899, |
| "grad_norm": 1.0906797325242925, |
| "kl": 0.024761962890625, |
| "learning_rate": 2e-07, |
| "loss": -0.007297384738922119, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334028720856, |
| "reward_std": 0.3393357157707214, |
| "rewards/MultiModalAccuracyORM": 0.23333334028720856, |
| "step": 2450, |
| "train_speed(iter/s)": 0.026893 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 355.7, |
| "epoch": 0.9919191919191919, |
| "grad_norm": 1.8168984918524227, |
| "kl": 0.0161956787109375, |
| "learning_rate": 2e-07, |
| "loss": 0.03163195252418518, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.450000011920929, |
| "reward_std": 0.37525942325592043, |
| "rewards/MultiModalAccuracyORM": 0.450000011920929, |
| "step": 2455, |
| "train_speed(iter/s)": 0.026915 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.05, |
| "epoch": 0.9939393939393939, |
| "grad_norm": 1.171315154121709, |
| "kl": 0.02081298828125, |
| "learning_rate": 2e-07, |
| "loss": 0.014726841449737548, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333402872086, |
| "reward_std": 0.29634127020835876, |
| "rewards/MultiModalAccuracyORM": 0.4083333402872086, |
| "step": 2460, |
| "train_speed(iter/s)": 0.026936 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 302.6, |
| "epoch": 0.9959595959595959, |
| "grad_norm": 0.9872275853532635, |
| "kl": 0.01080322265625, |
| "learning_rate": 2e-07, |
| "loss": 0.01651265621185303, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5666666753590107, |
| "reward_std": 0.2488823115825653, |
| "rewards/MultiModalAccuracyORM": 0.5666666753590107, |
| "step": 2465, |
| "train_speed(iter/s)": 0.026959 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 438.75, |
| "epoch": 0.997979797979798, |
| "grad_norm": 1.8423007639906985, |
| "kl": 0.0185638427734375, |
| "learning_rate": 2e-07, |
| "loss": -0.006967762112617492, |
| "memory(GiB)": 87.45, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667237877844, |
| "reward_std": 0.21999078691005708, |
| "rewards/MultiModalAccuracyORM": 0.31666667237877844, |
| "step": 2470, |
| "train_speed(iter/s)": 0.02698 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.4251028884123285, |
| "learning_rate": 2e-07, |
| "loss": -0.04546417593955994, |
| "memory(GiB)": 87.45, |
| "step": 2475, |
| "train_speed(iter/s)": 0.026999 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 364.18834014892576, |
| "eval_kl": 0.0238104248046875, |
| "eval_loss": 0.01933932490646839, |
| "eval_response_clip_ratio": 0.00833333358168602, |
| "eval_reward": 0.34333334282040595, |
| "eval_reward_std": 0.295663959980011, |
| "eval_rewards/MultiModalAccuracyORM": 0.34333334282040595, |
| "eval_runtime": 580.8644, |
| "eval_samples_per_second": 0.086, |
| "eval_steps_per_second": 0.009, |
| "step": 2475 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2475, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|