| { |
| "best_metric": 0.4650000059604645, |
| "best_model_checkpoint": "/mnt/data/user/zhao_jun/tangjixin/output/model/llava_ov-grpo_new_v20_5k/v8-20250330-101445/checkpoint-2475", |
| "epoch": 1.0, |
| "eval_steps": 250, |
| "global_step": 2475, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.833333730697632, |
| "epoch": 0.00040404040404040404, |
| "grad_norm": 4.95974063873291, |
| "kl": 0.0007257461547851562, |
| "learning_rate": 1.6129032258064515e-09, |
| "loss": 0.1313462257385254, |
| "memory(GiB)": 103.91, |
| "response_clip_ratio": 0.0, |
| "reward": 0.125, |
| "reward_std": 0.22613351047039032, |
| "rewards/MultiModalAccuracyORM": 0.125, |
| "step": 1, |
| "train_speed(iter/s)": 0.011139 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 45.927083522081375, |
| "epoch": 0.00202020202020202, |
| "grad_norm": 0.026089413091540337, |
| "kl": 0.00024419277906417847, |
| "learning_rate": 8.064516129032257e-09, |
| "loss": -0.0017255048733204603, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.0416666679084301, |
| "reward_std": 0.09731236100196838, |
| "rewards/MultiModalAccuracyORM": 0.0416666679084301, |
| "step": 5, |
| "train_speed(iter/s)": 0.028079 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.066667795181274, |
| "epoch": 0.00404040404040404, |
| "grad_norm": 4.474486827850342, |
| "kl": 4.897117614746094e-05, |
| "learning_rate": 1.6129032258064514e-08, |
| "loss": 0.005788012593984604, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1250000014901161, |
| "reward_std": 0.2712650209665298, |
| "rewards/MultiModalAccuracyORM": 0.1250000014901161, |
| "step": 10, |
| "train_speed(iter/s)": 0.034795 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 39.85000114440918, |
| "epoch": 0.006060606060606061, |
| "grad_norm": 2.904900074005127, |
| "kl": 0.00015695095062255858, |
| "learning_rate": 2.4193548387096773e-08, |
| "loss": 0.036757296323776244, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2416666716337204, |
| "reward_std": 0.29389037787914274, |
| "rewards/MultiModalAccuracyORM": 0.2416666716337204, |
| "step": 15, |
| "train_speed(iter/s)": 0.0376 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 69.10000429153442, |
| "epoch": 0.00808080808080808, |
| "grad_norm": 1.9090512990951538, |
| "kl": 0.00022979974746704102, |
| "learning_rate": 3.225806451612903e-08, |
| "loss": 0.00942036360502243, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.16666667088866233, |
| "reward_std": 0.31846399009227755, |
| "rewards/MultiModalAccuracyORM": 0.16666667088866233, |
| "step": 20, |
| "train_speed(iter/s)": 0.03857 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 34.64166686534882, |
| "epoch": 0.010101010101010102, |
| "grad_norm": 23.398836135864258, |
| "kl": 0.00027928352355957033, |
| "learning_rate": 4.032258064516129e-08, |
| "loss": -0.005109664052724838, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333410322666, |
| "reward_std": 0.31046820282936094, |
| "rewards/MultiModalAccuracyORM": 0.2083333410322666, |
| "step": 25, |
| "train_speed(iter/s)": 0.039527 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 27.183334088325502, |
| "epoch": 0.012121212121212121, |
| "grad_norm": 0.027309712022542953, |
| "kl": 0.0002372264862060547, |
| "learning_rate": 4.8387096774193546e-08, |
| "loss": -0.016541659832000732, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.05833333432674408, |
| "reward_std": 0.14188667237758637, |
| "rewards/MultiModalAccuracyORM": 0.05833333432674408, |
| "step": 30, |
| "train_speed(iter/s)": 0.040173 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 48.750002241134645, |
| "epoch": 0.014141414141414142, |
| "grad_norm": 2.6486644744873047, |
| "kl": 0.00022208690643310547, |
| "learning_rate": 5.645161290322581e-08, |
| "loss": 0.03488517701625824, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1416666716337204, |
| "reward_std": 0.19962169826030732, |
| "rewards/MultiModalAccuracyORM": 0.1416666716337204, |
| "step": 35, |
| "train_speed(iter/s)": 0.040888 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.7666668176651, |
| "epoch": 0.01616161616161616, |
| "grad_norm": 13.41940689086914, |
| "kl": 0.00021257400512695313, |
| "learning_rate": 6.451612903225806e-08, |
| "loss": -0.0012449542991816998, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000223517418, |
| "reward_std": 0.2652174890041351, |
| "rewards/MultiModalAccuracyORM": 0.12500000223517418, |
| "step": 40, |
| "train_speed(iter/s)": 0.041651 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 65.25000057220458, |
| "epoch": 0.01818181818181818, |
| "grad_norm": 11.40164852142334, |
| "kl": 5.4210424423217773e-05, |
| "learning_rate": 7.258064516129032e-08, |
| "loss": 0.03769001364707947, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667386889457, |
| "reward_std": 0.325963220000267, |
| "rewards/MultiModalAccuracyORM": 0.21666667386889457, |
| "step": 45, |
| "train_speed(iter/s)": 0.041539 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.84166791439056, |
| "epoch": 0.020202020202020204, |
| "grad_norm": 0.03606203943490982, |
| "kl": 0.00031108856201171874, |
| "learning_rate": 8.064516129032257e-08, |
| "loss": 1.2442469596862793e-05, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.07500000074505805, |
| "reward_std": 0.15824586153030396, |
| "rewards/MultiModalAccuracyORM": 0.07500000074505805, |
| "step": 50, |
| "train_speed(iter/s)": 0.041821 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 20.025000762939452, |
| "epoch": 0.022222222222222223, |
| "grad_norm": 3.2404561042785645, |
| "kl": 0.0004961967468261718, |
| "learning_rate": 8.870967741935484e-08, |
| "loss": 0.016841122508049013, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166666865348815, |
| "reward_std": 0.3241831511259079, |
| "rewards/MultiModalAccuracyORM": 0.24166666865348815, |
| "step": 55, |
| "train_speed(iter/s)": 0.042244 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.516666769981384, |
| "epoch": 0.024242424242424242, |
| "grad_norm": 3.8046255111694336, |
| "kl": 6.520748138427735e-06, |
| "learning_rate": 9.677419354838709e-08, |
| "loss": -0.001297527551651001, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666668206453323, |
| "reward_std": 0.330777695775032, |
| "rewards/MultiModalAccuracyORM": 0.36666668206453323, |
| "step": 60, |
| "train_speed(iter/s)": 0.042408 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.5333336353302, |
| "epoch": 0.026262626262626262, |
| "grad_norm": 0.015074208378791809, |
| "kl": 0.00015583038330078126, |
| "learning_rate": 1.0483870967741934e-07, |
| "loss": -0.018772208690643312, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.3019101768732071, |
| "rewards/MultiModalAccuracyORM": 0.2083333395421505, |
| "step": 65, |
| "train_speed(iter/s)": 0.04265 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.125000405311585, |
| "epoch": 0.028282828282828285, |
| "grad_norm": 1.4802911281585693, |
| "kl": 0.0001938343048095703, |
| "learning_rate": 1.1290322580645162e-07, |
| "loss": 0.04349477887153626, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333805203437, |
| "reward_std": 0.26123160123825073, |
| "rewards/MultiModalAccuracyORM": 0.15833333805203437, |
| "step": 70, |
| "train_speed(iter/s)": 0.042774 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.00833351612091, |
| "epoch": 0.030303030303030304, |
| "grad_norm": 17.15009880065918, |
| "kl": 0.0005457401275634766, |
| "learning_rate": 1.2096774193548387e-07, |
| "loss": -0.03085809648036957, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000298023224, |
| "reward_std": 0.2855865776538849, |
| "rewards/MultiModalAccuracyORM": 0.20000000298023224, |
| "step": 75, |
| "train_speed(iter/s)": 0.043032 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 30.941667556762695, |
| "epoch": 0.03232323232323232, |
| "grad_norm": 0.15290312469005585, |
| "kl": 0.0005632162094116211, |
| "learning_rate": 1.2903225806451611e-07, |
| "loss": -0.019948795437812805, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.00833333358168602, |
| "reward": 0.15000000447034836, |
| "reward_std": 0.2066778928041458, |
| "rewards/MultiModalAccuracyORM": 0.15000000447034836, |
| "step": 80, |
| "train_speed(iter/s)": 0.042552 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.350000309944154, |
| "epoch": 0.03434343434343434, |
| "grad_norm": 10.242753028869629, |
| "kl": 0.0002181917428970337, |
| "learning_rate": 1.3709677419354838e-07, |
| "loss": -0.0021827301010489465, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333805203438, |
| "reward_std": 0.36318225264549253, |
| "rewards/MultiModalAccuracyORM": 0.20833333805203438, |
| "step": 85, |
| "train_speed(iter/s)": 0.042776 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 31.70833353996277, |
| "epoch": 0.03636363636363636, |
| "grad_norm": 18.3216552734375, |
| "kl": 0.00013442039489746093, |
| "learning_rate": 1.4516129032258064e-07, |
| "loss": -0.014865413308143616, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000521540643, |
| "reward_std": 0.19786564111709595, |
| "rewards/MultiModalAccuracyORM": 0.17500000521540643, |
| "step": 90, |
| "train_speed(iter/s)": 0.042668 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.166666793823243, |
| "epoch": 0.03838383838383838, |
| "grad_norm": 2.986149311065674, |
| "kl": 0.00017652511596679687, |
| "learning_rate": 1.5322580645161288e-07, |
| "loss": -0.004295501857995987, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.0833333358168602, |
| "reward_std": 0.18482151627540588, |
| "rewards/MultiModalAccuracyORM": 0.0833333358168602, |
| "step": 95, |
| "train_speed(iter/s)": 0.042663 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 37.32500224113464, |
| "epoch": 0.04040404040404041, |
| "grad_norm": 9.087557792663574, |
| "kl": 0.00025534629821777344, |
| "learning_rate": 1.6129032258064515e-07, |
| "loss": -0.042690178751945494, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.3192540168762207, |
| "rewards/MultiModalAccuracyORM": 0.24166667088866234, |
| "step": 100, |
| "train_speed(iter/s)": 0.042723 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 42.64166672229767, |
| "epoch": 0.04242424242424243, |
| "grad_norm": 1.299012303352356, |
| "kl": 0.000713956356048584, |
| "learning_rate": 1.6935483870967741e-07, |
| "loss": -0.01074601411819458, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1500000014901161, |
| "reward_std": 0.2782616138458252, |
| "rewards/MultiModalAccuracyORM": 0.1500000014901161, |
| "step": 105, |
| "train_speed(iter/s)": 0.042694 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.308334159851075, |
| "epoch": 0.044444444444444446, |
| "grad_norm": 20.200790405273438, |
| "kl": -2.079010009765625e-05, |
| "learning_rate": 1.7741935483870968e-07, |
| "loss": -0.0049890361726284025, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000447034836, |
| "reward_std": 0.34557787179946897, |
| "rewards/MultiModalAccuracyORM": 0.17500000447034836, |
| "step": 110, |
| "train_speed(iter/s)": 0.042795 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.325000619888307, |
| "epoch": 0.046464646464646465, |
| "grad_norm": 2.473445177078247, |
| "kl": 0.0003504753112792969, |
| "learning_rate": 1.8548387096774192e-07, |
| "loss": 0.009455542266368865, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333805203437, |
| "reward_std": 0.2629852324724197, |
| "rewards/MultiModalAccuracyORM": 0.15833333805203437, |
| "step": 115, |
| "train_speed(iter/s)": 0.042806 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.07500042915344, |
| "epoch": 0.048484848484848485, |
| "grad_norm": 18.782503128051758, |
| "kl": 0.00040736198425292967, |
| "learning_rate": 1.9354838709677418e-07, |
| "loss": -0.00938464030623436, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.17861495018005372, |
| "rewards/MultiModalAccuracyORM": 0.10000000149011612, |
| "step": 120, |
| "train_speed(iter/s)": 0.042915 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 40.666668796539305, |
| "epoch": 0.050505050505050504, |
| "grad_norm": 10.809483528137207, |
| "kl": 0.00013909339904785156, |
| "learning_rate": 2e-07, |
| "loss": 0.015682700276374816, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333417773247, |
| "reward_std": 0.2325587123632431, |
| "rewards/MultiModalAccuracyORM": 0.3083333417773247, |
| "step": 125, |
| "train_speed(iter/s)": 0.042982 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.79166784286499, |
| "epoch": 0.052525252525252523, |
| "grad_norm": 0.059968430548906326, |
| "kl": 0.0003565549850463867, |
| "learning_rate": 2e-07, |
| "loss": -0.012978824973106384, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833334028720857, |
| "reward_std": 0.2775311887264252, |
| "rewards/MultiModalAccuracyORM": 0.20833334028720857, |
| "step": 130, |
| "train_speed(iter/s)": 0.043138 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.358333635330201, |
| "epoch": 0.05454545454545454, |
| "grad_norm": 16.368749618530273, |
| "kl": 0.0005423665046691894, |
| "learning_rate": 2e-07, |
| "loss": -0.018562111258506774, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333432674408, |
| "reward_std": 0.3227223068475723, |
| "rewards/MultiModalAccuracyORM": 0.2083333432674408, |
| "step": 135, |
| "train_speed(iter/s)": 0.043281 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 46.30833601951599, |
| "epoch": 0.05656565656565657, |
| "grad_norm": 8.052789688110352, |
| "kl": 0.0008988380432128906, |
| "learning_rate": 2e-07, |
| "loss": 0.05945103764533997, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333879709243, |
| "reward_std": 0.32900004684925077, |
| "rewards/MultiModalAccuracyORM": 0.20833333879709243, |
| "step": 140, |
| "train_speed(iter/s)": 0.043225 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 4.983333492279053, |
| "epoch": 0.05858585858585859, |
| "grad_norm": 5.5169525146484375, |
| "kl": 0.0008536338806152344, |
| "learning_rate": 2e-07, |
| "loss": -0.03663218915462494, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2000000074505806, |
| "reward_std": 0.29079394936561587, |
| "rewards/MultiModalAccuracyORM": 0.2000000074505806, |
| "step": 145, |
| "train_speed(iter/s)": 0.043361 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.125000166893005, |
| "epoch": 0.06060606060606061, |
| "grad_norm": 0.07958526909351349, |
| "kl": 0.001511383056640625, |
| "learning_rate": 2e-07, |
| "loss": 0.05411055088043213, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2333333373069763, |
| "reward_std": 0.27122943103313446, |
| "rewards/MultiModalAccuracyORM": 0.2333333373069763, |
| "step": 150, |
| "train_speed(iter/s)": 0.043443 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.666666889190674, |
| "epoch": 0.06262626262626263, |
| "grad_norm": 0.0961478129029274, |
| "kl": 0.0021147727966308594, |
| "learning_rate": 2e-07, |
| "loss": 0.0017779668793082236, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000447034835, |
| "reward_std": 0.22052658796310426, |
| "rewards/MultiModalAccuracyORM": 0.20000000447034835, |
| "step": 155, |
| "train_speed(iter/s)": 0.043434 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 56.125001430511475, |
| "epoch": 0.06464646464646465, |
| "grad_norm": 3.5018489360809326, |
| "kl": 0.0011393070220947266, |
| "learning_rate": 2e-07, |
| "loss": 0.003215038776397705, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333395421505, |
| "reward_std": 0.2687189429998398, |
| "rewards/MultiModalAccuracyORM": 0.1833333395421505, |
| "step": 160, |
| "train_speed(iter/s)": 0.043475 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.183334255218504, |
| "epoch": 0.06666666666666667, |
| "grad_norm": 1.7839807271957397, |
| "kl": 0.001880502700805664, |
| "learning_rate": 2e-07, |
| "loss": 0.037510618567466736, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1250000037252903, |
| "reward_std": 0.2629256367683411, |
| "rewards/MultiModalAccuracyORM": 0.1250000037252903, |
| "step": 165, |
| "train_speed(iter/s)": 0.04338 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.583333587646484, |
| "epoch": 0.06868686868686869, |
| "grad_norm": 2.9806480407714844, |
| "kl": 0.001198887825012207, |
| "learning_rate": 2e-07, |
| "loss": 0.007929786294698715, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.19166667461395265, |
| "reward_std": 0.21750431060791015, |
| "rewards/MultiModalAccuracyORM": 0.19166667461395265, |
| "step": 170, |
| "train_speed(iter/s)": 0.043348 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.308333587646484, |
| "epoch": 0.0707070707070707, |
| "grad_norm": 0.006374528165906668, |
| "kl": 0.008016198873519897, |
| "learning_rate": 2e-07, |
| "loss": 0.0161195233464241, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.19166667014360428, |
| "reward_std": 0.2822715133428574, |
| "rewards/MultiModalAccuracyORM": 0.19166667014360428, |
| "step": 175, |
| "train_speed(iter/s)": 0.043522 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.283334064483643, |
| "epoch": 0.07272727272727272, |
| "grad_norm": 13.373006820678711, |
| "kl": 0.005344104766845703, |
| "learning_rate": 2e-07, |
| "loss": 0.005642924830317498, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1250000037252903, |
| "reward_std": 0.2629256367683411, |
| "rewards/MultiModalAccuracyORM": 0.1250000037252903, |
| "step": 180, |
| "train_speed(iter/s)": 0.043562 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.858333396911621, |
| "epoch": 0.07474747474747474, |
| "grad_norm": 20.940757751464844, |
| "kl": 0.004119682312011719, |
| "learning_rate": 2e-07, |
| "loss": -0.014204351603984833, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667312383653, |
| "reward_std": 0.24560283720493317, |
| "rewards/MultiModalAccuracyORM": 0.21666667312383653, |
| "step": 185, |
| "train_speed(iter/s)": 0.043604 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.808333730697631, |
| "epoch": 0.07676767676767676, |
| "grad_norm": 1.9175783395767212, |
| "kl": 0.0015784263610839843, |
| "learning_rate": 2e-07, |
| "loss": 0.036653178930282596, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.27774982452392577, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 190, |
| "train_speed(iter/s)": 0.043708 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.058333468437194, |
| "epoch": 0.07878787878787878, |
| "grad_norm": 20.731929779052734, |
| "kl": 0.002748870849609375, |
| "learning_rate": 2e-07, |
| "loss": -0.007462918758392334, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667461395264, |
| "reward_std": 0.26047474443912505, |
| "rewards/MultiModalAccuracyORM": 0.24166667461395264, |
| "step": 195, |
| "train_speed(iter/s)": 0.043797 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 39.766668224334715, |
| "epoch": 0.08080808080808081, |
| "grad_norm": 32.81786346435547, |
| "kl": 0.012819027900695801, |
| "learning_rate": 2e-07, |
| "loss": -0.012741921842098236, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666731238365, |
| "reward_std": 0.3634008765220642, |
| "rewards/MultiModalAccuracyORM": 0.1916666731238365, |
| "step": 200, |
| "train_speed(iter/s)": 0.043791 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.191666889190674, |
| "epoch": 0.08282828282828283, |
| "grad_norm": 10.631654739379883, |
| "kl": 0.007097434997558594, |
| "learning_rate": 2e-07, |
| "loss": -0.059709519147872925, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000223517418, |
| "reward_std": 0.26302082240581515, |
| "rewards/MultiModalAccuracyORM": 0.15000000223517418, |
| "step": 205, |
| "train_speed(iter/s)": 0.043862 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.100000143051147, |
| "epoch": 0.08484848484848485, |
| "grad_norm": 15.135857582092285, |
| "kl": 0.016997623443603515, |
| "learning_rate": 2e-07, |
| "loss": 0.036284705996513365, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10833333730697632, |
| "reward_std": 0.24481281042098998, |
| "rewards/MultiModalAccuracyORM": 0.10833333730697632, |
| "step": 210, |
| "train_speed(iter/s)": 0.043845 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.075000190734864, |
| "epoch": 0.08686868686868687, |
| "grad_norm": 15.046256065368652, |
| "kl": 0.013745307922363281, |
| "learning_rate": 2e-07, |
| "loss": -0.01842743158340454, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.3001325339078903, |
| "rewards/MultiModalAccuracyORM": 0.3166666753590107, |
| "step": 215, |
| "train_speed(iter/s)": 0.043911 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 31.94166750907898, |
| "epoch": 0.08888888888888889, |
| "grad_norm": 14.397719383239746, |
| "kl": 0.01525421142578125, |
| "learning_rate": 2e-07, |
| "loss": -0.016506943106651305, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000894069672, |
| "reward_std": 0.24662604331970214, |
| "rewards/MultiModalAccuracyORM": 0.22500000894069672, |
| "step": 220, |
| "train_speed(iter/s)": 0.043992 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.44166669845581, |
| "epoch": 0.09090909090909091, |
| "grad_norm": 12.164202690124512, |
| "kl": 0.025649261474609376, |
| "learning_rate": 2e-07, |
| "loss": 0.017044636607170104, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000074505806, |
| "reward_std": 0.28160068988800047, |
| "rewards/MultiModalAccuracyORM": 0.3000000074505806, |
| "step": 225, |
| "train_speed(iter/s)": 0.044113 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 39.383334040641785, |
| "epoch": 0.09292929292929293, |
| "grad_norm": 21.127038955688477, |
| "kl": 0.024017763137817384, |
| "learning_rate": 2e-07, |
| "loss": 0.02930714190006256, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1666666693985462, |
| "reward_std": 0.26196202635765076, |
| "rewards/MultiModalAccuracyORM": 0.1666666693985462, |
| "step": 230, |
| "train_speed(iter/s)": 0.044142 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.891666889190674, |
| "epoch": 0.09494949494949495, |
| "grad_norm": 6.2940568923950195, |
| "kl": 0.027823114395141603, |
| "learning_rate": 2e-07, |
| "loss": -0.009951599687337876, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000521540643, |
| "reward_std": 0.3036638140678406, |
| "rewards/MultiModalAccuracyORM": 0.17500000521540643, |
| "step": 235, |
| "train_speed(iter/s)": 0.044213 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.750000047683716, |
| "epoch": 0.09696969696969697, |
| "grad_norm": 3.980544090270996, |
| "kl": 0.018259000778198243, |
| "learning_rate": 2e-07, |
| "loss": -0.020673815906047822, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.16666667014360428, |
| "reward_std": 0.21823472976684571, |
| "rewards/MultiModalAccuracyORM": 0.16666667014360428, |
| "step": 240, |
| "train_speed(iter/s)": 0.044233 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.80000023841858, |
| "epoch": 0.09898989898989899, |
| "grad_norm": 1.3881502151489258, |
| "kl": 0.000605630874633789, |
| "learning_rate": 2e-07, |
| "loss": -0.01487920731306076, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334177732467, |
| "reward_std": 0.30661733746528624, |
| "rewards/MultiModalAccuracyORM": 0.28333334177732467, |
| "step": 245, |
| "train_speed(iter/s)": 0.044235 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 11.512455940246582, |
| "learning_rate": 2e-07, |
| "loss": 0.033054867386817934, |
| "memory(GiB)": 104.49, |
| "step": 250, |
| "train_speed(iter/s)": 0.044081 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 24.26333417892456, |
| "eval_kl": 0.022986836433410644, |
| "eval_loss": 0.027694934979081154, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.2150000040233135, |
| "eval_reward_std": 0.2852368396520615, |
| "eval_rewards/MultiModalAccuracyORM": 0.2150000040233135, |
| "eval_runtime": 262.2909, |
| "eval_samples_per_second": 0.191, |
| "eval_steps_per_second": 0.019, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 46.133334922790525, |
| "epoch": 0.10303030303030303, |
| "grad_norm": 4.130315780639648, |
| "kl": 0.018082523345947267, |
| "learning_rate": 2e-07, |
| "loss": 0.024475347995758057, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166666977107526, |
| "reward_std": 0.2766233593225479, |
| "rewards/MultiModalAccuracyORM": 0.24166666977107526, |
| "step": 255, |
| "train_speed(iter/s)": 0.041648 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.6, |
| "epoch": 0.10505050505050505, |
| "grad_norm": 10.52556324005127, |
| "kl": 0.020127105712890624, |
| "learning_rate": 2e-07, |
| "loss": -0.008974193781614303, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000037252903, |
| "reward_std": 0.2567190647125244, |
| "rewards/MultiModalAccuracyORM": 0.2750000037252903, |
| "step": 260, |
| "train_speed(iter/s)": 0.041738 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.45, |
| "epoch": 0.10707070707070707, |
| "grad_norm": 11.179485321044922, |
| "kl": 0.03880462646484375, |
| "learning_rate": 2e-07, |
| "loss": 0.0015405803918838502, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1666666716337204, |
| "reward_std": 0.2918527454137802, |
| "rewards/MultiModalAccuracyORM": 0.1666666716337204, |
| "step": 265, |
| "train_speed(iter/s)": 0.041756 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.75, |
| "epoch": 0.10909090909090909, |
| "grad_norm": 4.639992713928223, |
| "kl": 0.018306541442871093, |
| "learning_rate": 2e-07, |
| "loss": -0.012826296687126159, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10833333656191826, |
| "reward_std": 0.174764084815979, |
| "rewards/MultiModalAccuracyORM": 0.10833333656191826, |
| "step": 270, |
| "train_speed(iter/s)": 0.041759 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 117.6, |
| "epoch": 0.1111111111111111, |
| "grad_norm": 14.52376651763916, |
| "kl": 0.02277069091796875, |
| "learning_rate": 2e-07, |
| "loss": -0.03760814070701599, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.05, |
| "reward": 0.30833334028720855, |
| "reward_std": 0.3679845929145813, |
| "rewards/MultiModalAccuracyORM": 0.30833334028720855, |
| "step": 275, |
| "train_speed(iter/s)": 0.041762 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 41.5, |
| "epoch": 0.11313131313131314, |
| "grad_norm": 7.044532775878906, |
| "kl": 0.04247570037841797, |
| "learning_rate": 2e-07, |
| "loss": 0.05246252417564392, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000670552253, |
| "reward_std": 0.30385262966156007, |
| "rewards/MultiModalAccuracyORM": 0.22500000670552253, |
| "step": 280, |
| "train_speed(iter/s)": 0.041745 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 23.9, |
| "epoch": 0.11515151515151516, |
| "grad_norm": 3.5612969398498535, |
| "kl": 0.04666891098022461, |
| "learning_rate": 2e-07, |
| "loss": -0.03580006957054138, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.14166667312383652, |
| "reward_std": 0.20594746768474578, |
| "rewards/MultiModalAccuracyORM": 0.14166667312383652, |
| "step": 285, |
| "train_speed(iter/s)": 0.041805 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 57.5, |
| "epoch": 0.11717171717171718, |
| "grad_norm": 22.66056251525879, |
| "kl": 0.0072917938232421875, |
| "learning_rate": 2e-07, |
| "loss": 0.030799278616905214, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667386889457, |
| "reward_std": 0.3523798406124115, |
| "rewards/MultiModalAccuracyORM": 0.24166667386889457, |
| "step": 290, |
| "train_speed(iter/s)": 0.041794 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 64.1, |
| "epoch": 0.1191919191919192, |
| "grad_norm": 16.353897094726562, |
| "kl": 0.02278270721435547, |
| "learning_rate": 2e-07, |
| "loss": 0.0040659308433532715, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.07500000149011612, |
| "reward_std": 0.16200153529644012, |
| "rewards/MultiModalAccuracyORM": 0.07500000149011612, |
| "step": 295, |
| "train_speed(iter/s)": 0.041713 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 26.0, |
| "epoch": 0.12121212121212122, |
| "grad_norm": 3.0584208965301514, |
| "kl": 0.021613693237304686, |
| "learning_rate": 2e-07, |
| "loss": 0.015577539801597595, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000447034837, |
| "reward_std": 0.2175043046474457, |
| "rewards/MultiModalAccuracyORM": 0.12500000447034837, |
| "step": 300, |
| "train_speed(iter/s)": 0.041708 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.4, |
| "epoch": 0.12323232323232323, |
| "grad_norm": 2.683347225189209, |
| "kl": 0.05754499435424805, |
| "learning_rate": 2e-07, |
| "loss": 0.0014399250969290734, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333805203437, |
| "reward_std": 0.24637182354927062, |
| "rewards/MultiModalAccuracyORM": 0.13333333805203437, |
| "step": 305, |
| "train_speed(iter/s)": 0.041731 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.0, |
| "epoch": 0.12525252525252525, |
| "grad_norm": 4.011137008666992, |
| "kl": 0.003471851348876953, |
| "learning_rate": 2e-07, |
| "loss": -0.012657842040061951, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000223517418, |
| "reward_std": 0.17781037986278533, |
| "rewards/MultiModalAccuracyORM": 0.12500000223517418, |
| "step": 310, |
| "train_speed(iter/s)": 0.041745 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.4, |
| "epoch": 0.12727272727272726, |
| "grad_norm": 2.4296364784240723, |
| "kl": 0.01938905715942383, |
| "learning_rate": 2e-07, |
| "loss": 0.023499640822410583, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000149011613, |
| "reward_std": 0.1808116167783737, |
| "rewards/MultiModalAccuracyORM": 0.17500000149011613, |
| "step": 315, |
| "train_speed(iter/s)": 0.041811 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.35, |
| "epoch": 0.1292929292929293, |
| "grad_norm": 1.5319490432739258, |
| "kl": 0.023272895812988283, |
| "learning_rate": 2e-07, |
| "loss": -0.0005661348812282085, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000298023223, |
| "reward_std": 0.23860624432563782, |
| "rewards/MultiModalAccuracyORM": 0.22500000298023223, |
| "step": 320, |
| "train_speed(iter/s)": 0.041846 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.8, |
| "epoch": 0.13131313131313133, |
| "grad_norm": 28.09259605407715, |
| "kl": 0.055776214599609374, |
| "learning_rate": 2e-07, |
| "loss": -0.00978400707244873, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333507180214, |
| "reward_std": 0.2785158395767212, |
| "rewards/MultiModalAccuracyORM": 0.15833333507180214, |
| "step": 325, |
| "train_speed(iter/s)": 0.041894 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.8, |
| "epoch": 0.13333333333333333, |
| "grad_norm": 5.655847072601318, |
| "kl": 0.01194305419921875, |
| "learning_rate": 2e-07, |
| "loss": -0.023021923005580903, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000037252903, |
| "reward_std": 0.242361918091774, |
| "rewards/MultiModalAccuracyORM": 0.2250000037252903, |
| "step": 330, |
| "train_speed(iter/s)": 0.041922 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.0, |
| "epoch": 0.13535353535353536, |
| "grad_norm": 16.269479751586914, |
| "kl": 0.012023067474365235, |
| "learning_rate": 2e-07, |
| "loss": 0.009542696177959442, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333380520344, |
| "reward_std": 0.4074155628681183, |
| "rewards/MultiModalAccuracyORM": 0.2583333380520344, |
| "step": 335, |
| "train_speed(iter/s)": 0.041926 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.1, |
| "epoch": 0.13737373737373737, |
| "grad_norm": 19.7489013671875, |
| "kl": 0.041985511779785156, |
| "learning_rate": 2e-07, |
| "loss": -0.009631294012069701, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000111758709, |
| "reward_std": 0.38227055966854095, |
| "rewards/MultiModalAccuracyORM": 0.3250000111758709, |
| "step": 340, |
| "train_speed(iter/s)": 0.042003 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.25, |
| "epoch": 0.1393939393939394, |
| "grad_norm": 25.704818725585938, |
| "kl": 0.02933082580566406, |
| "learning_rate": 2e-07, |
| "loss": 0.005663518235087395, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000596046447, |
| "reward_std": 0.287842845916748, |
| "rewards/MultiModalAccuracyORM": 0.17500000596046447, |
| "step": 345, |
| "train_speed(iter/s)": 0.042012 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.0, |
| "epoch": 0.1414141414141414, |
| "grad_norm": 30.1114559173584, |
| "kl": 0.010479164123535157, |
| "learning_rate": 2e-07, |
| "loss": 0.018732863664627075, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000447034837, |
| "reward_std": 0.2077010989189148, |
| "rewards/MultiModalAccuracyORM": 0.12500000447034837, |
| "step": 350, |
| "train_speed(iter/s)": 0.041986 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.65, |
| "epoch": 0.14343434343434344, |
| "grad_norm": 4.131731033325195, |
| "kl": 0.03218498229980469, |
| "learning_rate": 2e-07, |
| "loss": 0.05048830509185791, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333380520344, |
| "reward_std": 0.22854881286621093, |
| "rewards/MultiModalAccuracyORM": 0.1833333380520344, |
| "step": 355, |
| "train_speed(iter/s)": 0.041992 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.9, |
| "epoch": 0.14545454545454545, |
| "grad_norm": 2.5443966388702393, |
| "kl": 0.028252887725830077, |
| "learning_rate": 2e-07, |
| "loss": 0.011212460696697235, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666813194752, |
| "reward_std": 0.3104326128959656, |
| "rewards/MultiModalAccuracyORM": 0.3166666813194752, |
| "step": 360, |
| "train_speed(iter/s)": 0.042049 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 52.05, |
| "epoch": 0.14747474747474748, |
| "grad_norm": 4.374809265136719, |
| "kl": 0.024268913269042968, |
| "learning_rate": 2e-07, |
| "loss": -0.0001811852096579969, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000447034836, |
| "reward_std": 0.20544483065605162, |
| "rewards/MultiModalAccuracyORM": 0.15000000447034836, |
| "step": 365, |
| "train_speed(iter/s)": 0.042035 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.75, |
| "epoch": 0.1494949494949495, |
| "grad_norm": 16.779956817626953, |
| "kl": 0.015867042541503906, |
| "learning_rate": 2e-07, |
| "loss": 0.022855284810066222, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000447034836, |
| "reward_std": 0.17529989182949066, |
| "rewards/MultiModalAccuracyORM": 0.17500000447034836, |
| "step": 370, |
| "train_speed(iter/s)": 0.042043 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.05, |
| "epoch": 0.15151515151515152, |
| "grad_norm": 1.799055576324463, |
| "kl": 0.02576103210449219, |
| "learning_rate": 2e-07, |
| "loss": 0.03886341452598572, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000670552255, |
| "reward_std": 0.23481498062610626, |
| "rewards/MultiModalAccuracyORM": 0.15000000670552255, |
| "step": 375, |
| "train_speed(iter/s)": 0.041993 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.3, |
| "epoch": 0.15353535353535352, |
| "grad_norm": 14.809004783630371, |
| "kl": 0.06607561111450196, |
| "learning_rate": 2e-07, |
| "loss": 0.02258915901184082, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.27759079039096834, |
| "rewards/MultiModalAccuracyORM": 0.27500000447034834, |
| "step": 380, |
| "train_speed(iter/s)": 0.042034 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.6, |
| "epoch": 0.15555555555555556, |
| "grad_norm": 4.855790138244629, |
| "kl": 0.044758033752441403, |
| "learning_rate": 2e-07, |
| "loss": 0.006666116416454315, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666731238365, |
| "reward_std": 0.2877832442522049, |
| "rewards/MultiModalAccuracyORM": 0.1916666731238365, |
| "step": 385, |
| "train_speed(iter/s)": 0.042053 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.45, |
| "epoch": 0.15757575757575756, |
| "grad_norm": 3.650961399078369, |
| "kl": 0.09126663208007812, |
| "learning_rate": 2e-07, |
| "loss": -0.006338779628276825, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000014901161, |
| "reward_std": 0.22384165227413177, |
| "rewards/MultiModalAccuracyORM": 0.2750000014901161, |
| "step": 390, |
| "train_speed(iter/s)": 0.04209 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.7, |
| "epoch": 0.1595959595959596, |
| "grad_norm": 22.398860931396484, |
| "kl": 0.05564393997192383, |
| "learning_rate": 2e-07, |
| "loss": 0.011527793109416961, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666716337204, |
| "reward_std": 0.3385071337223053, |
| "rewards/MultiModalAccuracyORM": 0.2666666716337204, |
| "step": 395, |
| "train_speed(iter/s)": 0.04213 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.45, |
| "epoch": 0.16161616161616163, |
| "grad_norm": 3.777151346206665, |
| "kl": 0.08077354431152343, |
| "learning_rate": 2e-07, |
| "loss": 0.02410067617893219, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666666939854622, |
| "reward_std": 0.2687189429998398, |
| "rewards/MultiModalAccuracyORM": 0.11666666939854622, |
| "step": 400, |
| "train_speed(iter/s)": 0.04213 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.7, |
| "epoch": 0.16363636363636364, |
| "grad_norm": 6.114872455596924, |
| "kl": 0.09431419372558594, |
| "learning_rate": 2e-07, |
| "loss": 0.02062232345342636, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333410322666, |
| "reward_std": 0.384308198094368, |
| "rewards/MultiModalAccuracyORM": 0.2833333410322666, |
| "step": 405, |
| "train_speed(iter/s)": 0.042217 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.1, |
| "epoch": 0.16565656565656567, |
| "grad_norm": 2.8733115196228027, |
| "kl": 0.07746734619140624, |
| "learning_rate": 2e-07, |
| "loss": 0.014683787524700165, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000074505806, |
| "reward_std": 0.28160068988800047, |
| "rewards/MultiModalAccuracyORM": 0.3500000074505806, |
| "step": 410, |
| "train_speed(iter/s)": 0.042237 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.55, |
| "epoch": 0.16767676767676767, |
| "grad_norm": 1.103491187095642, |
| "kl": 0.013630294799804687, |
| "learning_rate": 2e-07, |
| "loss": 0.031570857763290404, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.3222051203250885, |
| "rewards/MultiModalAccuracyORM": 0.23333333805203438, |
| "step": 415, |
| "train_speed(iter/s)": 0.042253 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.65, |
| "epoch": 0.1696969696969697, |
| "grad_norm": 19.609107971191406, |
| "kl": 0.006585693359375, |
| "learning_rate": 2e-07, |
| "loss": 0.029933744668960573, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166666865348817, |
| "reward_std": 0.2815766781568527, |
| "rewards/MultiModalAccuracyORM": 0.29166666865348817, |
| "step": 420, |
| "train_speed(iter/s)": 0.042267 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.85, |
| "epoch": 0.1717171717171717, |
| "grad_norm": 3.5567312240600586, |
| "kl": 0.027184486389160156, |
| "learning_rate": 2e-07, |
| "loss": -0.008297288417816162, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000067055225, |
| "reward_std": 0.3423224091529846, |
| "rewards/MultiModalAccuracyORM": 0.3000000067055225, |
| "step": 425, |
| "train_speed(iter/s)": 0.042268 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 40.6, |
| "epoch": 0.17373737373737375, |
| "grad_norm": 4.005617141723633, |
| "kl": 0.037563323974609375, |
| "learning_rate": 2e-07, |
| "loss": -0.008759691566228866, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000298023223, |
| "reward_std": 0.2403598755598068, |
| "rewards/MultiModalAccuracyORM": 0.22500000298023223, |
| "step": 430, |
| "train_speed(iter/s)": 0.042273 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.65, |
| "epoch": 0.17575757575757575, |
| "grad_norm": 1.1876083612442017, |
| "kl": 0.04276580810546875, |
| "learning_rate": 2e-07, |
| "loss": 0.009293363988399505, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.25639069378376006, |
| "rewards/MultiModalAccuracyORM": 0.23333333805203438, |
| "step": 435, |
| "train_speed(iter/s)": 0.042306 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.1, |
| "epoch": 0.17777777777777778, |
| "grad_norm": 1.259384274482727, |
| "kl": 0.09014434814453125, |
| "learning_rate": 2e-07, |
| "loss": 0.07308403849601745, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333373069763, |
| "reward_std": 0.2925831705331802, |
| "rewards/MultiModalAccuracyORM": 0.2083333373069763, |
| "step": 440, |
| "train_speed(iter/s)": 0.042352 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.1, |
| "epoch": 0.1797979797979798, |
| "grad_norm": 1.2361171245574951, |
| "kl": 0.0314971923828125, |
| "learning_rate": 2e-07, |
| "loss": -0.04375269114971161, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833333656191825, |
| "reward_std": 0.29863070249557494, |
| "rewards/MultiModalAccuracyORM": 0.30833333656191825, |
| "step": 445, |
| "train_speed(iter/s)": 0.042392 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.65, |
| "epoch": 0.18181818181818182, |
| "grad_norm": 2.4363491535186768, |
| "kl": 0.07178993225097656, |
| "learning_rate": 2e-07, |
| "loss": 0.0028454601764678956, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000596046447, |
| "reward_std": 0.41791602969169617, |
| "rewards/MultiModalAccuracyORM": 0.32500000596046447, |
| "step": 450, |
| "train_speed(iter/s)": 0.042441 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.6, |
| "epoch": 0.18383838383838383, |
| "grad_norm": 12.971217155456543, |
| "kl": 0.05601959228515625, |
| "learning_rate": 2e-07, |
| "loss": 0.012572245299816131, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.22704698145389557, |
| "rewards/MultiModalAccuracyORM": 0.25833333656191826, |
| "step": 455, |
| "train_speed(iter/s)": 0.042477 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.85, |
| "epoch": 0.18585858585858586, |
| "grad_norm": 11.262785911560059, |
| "kl": 0.014653778076171875, |
| "learning_rate": 2e-07, |
| "loss": 0.005643188953399658, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000022351742, |
| "reward_std": 0.26040059328079224, |
| "rewards/MultiModalAccuracyORM": 0.2750000022351742, |
| "step": 460, |
| "train_speed(iter/s)": 0.042456 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.2, |
| "epoch": 0.18787878787878787, |
| "grad_norm": 9.14407730102539, |
| "kl": 0.03995361328125, |
| "learning_rate": 2e-07, |
| "loss": 0.0012056897394359112, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000521540642, |
| "reward_std": 0.3923635810613632, |
| "rewards/MultiModalAccuracyORM": 0.22500000521540642, |
| "step": 465, |
| "train_speed(iter/s)": 0.042452 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.2, |
| "epoch": 0.1898989898989899, |
| "grad_norm": 2.3540585041046143, |
| "kl": 0.041180419921875, |
| "learning_rate": 2e-07, |
| "loss": 0.018683533370494842, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.20722824335098267, |
| "rewards/MultiModalAccuracyORM": 0.10000000149011612, |
| "step": 470, |
| "train_speed(iter/s)": 0.042503 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 27.25, |
| "epoch": 0.1919191919191919, |
| "grad_norm": 6.397303581237793, |
| "kl": 0.02938995361328125, |
| "learning_rate": 2e-07, |
| "loss": 0.005294787883758545, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.308333333581686, |
| "reward_std": 0.31422091126441953, |
| "rewards/MultiModalAccuracyORM": 0.308333333581686, |
| "step": 475, |
| "train_speed(iter/s)": 0.042517 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.45, |
| "epoch": 0.19393939393939394, |
| "grad_norm": 15.569790840148926, |
| "kl": 0.07780342102050782, |
| "learning_rate": 2e-07, |
| "loss": 0.012630045413970947, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.36667739152908324, |
| "rewards/MultiModalAccuracyORM": 0.24166667088866234, |
| "step": 480, |
| "train_speed(iter/s)": 0.042512 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.95, |
| "epoch": 0.19595959595959597, |
| "grad_norm": 12.205713272094727, |
| "kl": 0.02214508056640625, |
| "learning_rate": 2e-07, |
| "loss": 0.012730973958969116, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333656191824, |
| "reward_std": 0.25566026866436004, |
| "rewards/MultiModalAccuracyORM": 0.35833333656191824, |
| "step": 485, |
| "train_speed(iter/s)": 0.042552 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.55, |
| "epoch": 0.19797979797979798, |
| "grad_norm": 0.97981858253479, |
| "kl": 0.05444526672363281, |
| "learning_rate": 2e-07, |
| "loss": 0.006719142198562622, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10000000074505806, |
| "reward_std": 0.203472563624382, |
| "rewards/MultiModalAccuracyORM": 0.10000000074505806, |
| "step": 490, |
| "train_speed(iter/s)": 0.04257 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 31.85, |
| "epoch": 0.2, |
| "grad_norm": 2.1149213314056396, |
| "kl": 0.06137847900390625, |
| "learning_rate": 2e-07, |
| "loss": 0.04113571047782898, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333334401249886, |
| "reward_std": 0.3259632259607315, |
| "rewards/MultiModalAccuracyORM": 0.38333334401249886, |
| "step": 495, |
| "train_speed(iter/s)": 0.042559 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 18.28374671936035, |
| "learning_rate": 2e-07, |
| "loss": 0.0038329623639583588, |
| "memory(GiB)": 104.49, |
| "step": 500, |
| "train_speed(iter/s)": 0.042571 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 26.648334164619445, |
| "eval_kl": 0.08782589912414551, |
| "eval_loss": 7.593631835334236e-06, |
| "eval_response_clip_ratio": 0.001666666716337204, |
| "eval_reward": 0.2816666740179062, |
| "eval_reward_std": 0.3331107318401337, |
| "eval_rewards/MultiModalAccuracyORM": 0.2816666740179062, |
| "eval_runtime": 274.2098, |
| "eval_samples_per_second": 0.182, |
| "eval_steps_per_second": 0.018, |
| "step": 500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.25, |
| "epoch": 0.20404040404040405, |
| "grad_norm": 6.910037517547607, |
| "kl": 0.07545309066772461, |
| "learning_rate": 2e-07, |
| "loss": 0.02395549863576889, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30416667498648164, |
| "reward_std": 0.2502841353416443, |
| "rewards/MultiModalAccuracyORM": 0.30416667498648164, |
| "step": 505, |
| "train_speed(iter/s)": 0.041389 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.3, |
| "epoch": 0.20606060606060606, |
| "grad_norm": 7.303215503692627, |
| "kl": 0.03816680908203125, |
| "learning_rate": 2e-07, |
| "loss": 0.012394474446773529, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.20363159775733947, |
| "rewards/MultiModalAccuracyORM": 0.30000000521540643, |
| "step": 510, |
| "train_speed(iter/s)": 0.041415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 37.0, |
| "epoch": 0.2080808080808081, |
| "grad_norm": 2.0224409103393555, |
| "kl": 0.038478851318359375, |
| "learning_rate": 2e-07, |
| "loss": -0.017507487535476686, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333387970924, |
| "reward_std": 0.37155145704746245, |
| "rewards/MultiModalAccuracyORM": 0.2583333387970924, |
| "step": 515, |
| "train_speed(iter/s)": 0.041435 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 22.9, |
| "epoch": 0.2101010101010101, |
| "grad_norm": 9.651928901672363, |
| "kl": 0.00984039306640625, |
| "learning_rate": 2e-07, |
| "loss": -0.002422221563756466, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000014901161, |
| "reward_std": 0.30187162160873415, |
| "rewards/MultiModalAccuracyORM": 0.3500000014901161, |
| "step": 520, |
| "train_speed(iter/s)": 0.041478 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.75, |
| "epoch": 0.21212121212121213, |
| "grad_norm": 5.6520562171936035, |
| "kl": 0.031005859375, |
| "learning_rate": 2e-07, |
| "loss": 0.00025533935986459254, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.2338038921356201, |
| "rewards/MultiModalAccuracyORM": 0.2916666716337204, |
| "step": 525, |
| "train_speed(iter/s)": 0.041505 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.5, |
| "epoch": 0.21414141414141413, |
| "grad_norm": 20.748729705810547, |
| "kl": 0.0915985107421875, |
| "learning_rate": 2e-07, |
| "loss": -0.01767445057630539, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000096857548, |
| "reward_std": 0.38835368156433103, |
| "rewards/MultiModalAccuracyORM": 0.3000000096857548, |
| "step": 530, |
| "train_speed(iter/s)": 0.041524 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.45, |
| "epoch": 0.21616161616161617, |
| "grad_norm": 0.023180894553661346, |
| "kl": 0.07088775634765625, |
| "learning_rate": 2e-07, |
| "loss": 0.029787826538085937, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000521540644, |
| "reward_std": 0.2526735752820969, |
| "rewards/MultiModalAccuracyORM": 0.37500000521540644, |
| "step": 535, |
| "train_speed(iter/s)": 0.041552 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.2, |
| "epoch": 0.21818181818181817, |
| "grad_norm": 16.621583938598633, |
| "kl": 0.05093994140625, |
| "learning_rate": 2e-07, |
| "loss": -0.009274721145629883, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.28561058938503264, |
| "rewards/MultiModalAccuracyORM": 0.29166667312383654, |
| "step": 540, |
| "train_speed(iter/s)": 0.041581 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.45, |
| "epoch": 0.2202020202020202, |
| "grad_norm": 17.103206634521484, |
| "kl": 0.0737823486328125, |
| "learning_rate": 2e-07, |
| "loss": 0.021037888526916505, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3583333432674408, |
| "reward_std": 0.28561058938503264, |
| "rewards/MultiModalAccuracyORM": 0.3583333432674408, |
| "step": 545, |
| "train_speed(iter/s)": 0.041645 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 28.85, |
| "epoch": 0.2222222222222222, |
| "grad_norm": 1.5227787494659424, |
| "kl": 0.07874641418457032, |
| "learning_rate": 2e-07, |
| "loss": 0.00487855076789856, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000223517417, |
| "reward_std": 0.21779412031173706, |
| "rewards/MultiModalAccuracyORM": 0.17500000223517417, |
| "step": 550, |
| "train_speed(iter/s)": 0.041506 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.6, |
| "epoch": 0.22424242424242424, |
| "grad_norm": 13.277663230895996, |
| "kl": 0.039247894287109376, |
| "learning_rate": 2e-07, |
| "loss": 0.008411864936351775, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333656191826, |
| "reward_std": 0.29784068167209626, |
| "rewards/MultiModalAccuracyORM": 0.13333333656191826, |
| "step": 555, |
| "train_speed(iter/s)": 0.041504 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 34.2, |
| "epoch": 0.22626262626262628, |
| "grad_norm": 0.10883937031030655, |
| "kl": 0.06273307800292968, |
| "learning_rate": 2e-07, |
| "loss": 0.012170317023992539, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666679084301, |
| "reward_std": 0.12333081662654877, |
| "rewards/MultiModalAccuracyORM": 0.2666666679084301, |
| "step": 560, |
| "train_speed(iter/s)": 0.041519 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.5, |
| "epoch": 0.22828282828282828, |
| "grad_norm": 12.209307670593262, |
| "kl": 0.04704780578613281, |
| "learning_rate": 2e-07, |
| "loss": 0.032337296009063723, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000074505806, |
| "reward_std": 0.2626924514770508, |
| "rewards/MultiModalAccuracyORM": 0.17500000074505806, |
| "step": 565, |
| "train_speed(iter/s)": 0.04155 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.85, |
| "epoch": 0.23030303030303031, |
| "grad_norm": 4.45810079574585, |
| "kl": 0.05213623046875, |
| "learning_rate": 2e-07, |
| "loss": 0.001686885952949524, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000447034836, |
| "reward_std": 0.25891573131084444, |
| "rewards/MultiModalAccuracyORM": 0.30000000447034836, |
| "step": 570, |
| "train_speed(iter/s)": 0.041578 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 65.55, |
| "epoch": 0.23232323232323232, |
| "grad_norm": 0.6504287719726562, |
| "kl": 0.08351707458496094, |
| "learning_rate": 2e-07, |
| "loss": 0.016631042957305907, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1500000014901161, |
| "reward_std": 0.26906835436820986, |
| "rewards/MultiModalAccuracyORM": 0.1500000014901161, |
| "step": 575, |
| "train_speed(iter/s)": 0.041538 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.85, |
| "epoch": 0.23434343434343435, |
| "grad_norm": 27.585575103759766, |
| "kl": 0.1207763671875, |
| "learning_rate": 2e-07, |
| "loss": -0.036790531873703, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000067055225, |
| "reward_std": 0.3860618233680725, |
| "rewards/MultiModalAccuracyORM": 0.3000000067055225, |
| "step": 580, |
| "train_speed(iter/s)": 0.041563 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.3, |
| "epoch": 0.23636363636363636, |
| "grad_norm": 10.094830513000488, |
| "kl": 0.04735574722290039, |
| "learning_rate": 2e-07, |
| "loss": 0.008206900209188461, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.19166667461395265, |
| "reward_std": 0.23631438612937927, |
| "rewards/MultiModalAccuracyORM": 0.19166667461395265, |
| "step": 585, |
| "train_speed(iter/s)": 0.041593 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 127.6, |
| "epoch": 0.2383838383838384, |
| "grad_norm": 3.5195720195770264, |
| "kl": 0.03963155746459961, |
| "learning_rate": 2e-07, |
| "loss": 0.027892309427261352, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.05, |
| "reward": 0.22500000521540642, |
| "reward_std": 0.22224706113338472, |
| "rewards/MultiModalAccuracyORM": 0.22500000521540642, |
| "step": 590, |
| "train_speed(iter/s)": 0.041543 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.75, |
| "epoch": 0.2404040404040404, |
| "grad_norm": 12.612972259521484, |
| "kl": 0.0610992431640625, |
| "learning_rate": 2e-07, |
| "loss": -0.022297632694244385, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000089406967, |
| "reward_std": 0.33376438319683077, |
| "rewards/MultiModalAccuracyORM": 0.4000000089406967, |
| "step": 595, |
| "train_speed(iter/s)": 0.041563 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.75, |
| "epoch": 0.24242424242424243, |
| "grad_norm": 1.1488845348358154, |
| "kl": 0.06821136474609375, |
| "learning_rate": 2e-07, |
| "loss": 0.03176195621490478, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000074505805, |
| "reward_std": 0.3149157464504242, |
| "rewards/MultiModalAccuracyORM": 0.22500000074505805, |
| "step": 600, |
| "train_speed(iter/s)": 0.041549 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.5, |
| "epoch": 0.24444444444444444, |
| "grad_norm": 4.132078170776367, |
| "kl": 0.07441596984863282, |
| "learning_rate": 2e-07, |
| "loss": 0.004773074015974999, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333879709244, |
| "reward_std": 0.2323044866323471, |
| "rewards/MultiModalAccuracyORM": 0.13333333879709244, |
| "step": 605, |
| "train_speed(iter/s)": 0.041583 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.85, |
| "epoch": 0.24646464646464647, |
| "grad_norm": 3.0928878784179688, |
| "kl": 0.050506591796875, |
| "learning_rate": 2e-07, |
| "loss": 0.0011304418556392192, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000670552255, |
| "reward_std": 0.31520852744579314, |
| "rewards/MultiModalAccuracyORM": 0.27500000670552255, |
| "step": 610, |
| "train_speed(iter/s)": 0.041586 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.45, |
| "epoch": 0.24848484848484848, |
| "grad_norm": 13.133064270019531, |
| "kl": 0.05210723876953125, |
| "learning_rate": 2e-07, |
| "loss": -0.009364684671163559, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833334028720857, |
| "reward_std": 0.28003925681114195, |
| "rewards/MultiModalAccuracyORM": 0.20833334028720857, |
| "step": 615, |
| "train_speed(iter/s)": 0.041615 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.05, |
| "epoch": 0.2505050505050505, |
| "grad_norm": 21.168598175048828, |
| "kl": 0.06778411865234375, |
| "learning_rate": 2e-07, |
| "loss": -0.006833799928426742, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000096857548, |
| "reward_std": 0.330559054017067, |
| "rewards/MultiModalAccuracyORM": 0.3000000096857548, |
| "step": 620, |
| "train_speed(iter/s)": 0.041639 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.35, |
| "epoch": 0.25252525252525254, |
| "grad_norm": 16.575620651245117, |
| "kl": 0.05116090774536133, |
| "learning_rate": 2e-07, |
| "loss": -0.016651205718517303, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000074505806, |
| "reward_std": 0.28859728276729585, |
| "rewards/MultiModalAccuracyORM": 0.2250000074505806, |
| "step": 625, |
| "train_speed(iter/s)": 0.041672 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.7, |
| "epoch": 0.2545454545454545, |
| "grad_norm": 3.503321886062622, |
| "kl": 0.0628082275390625, |
| "learning_rate": 2e-07, |
| "loss": -0.008116110414266586, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666716337204, |
| "reward_std": 0.2330589234828949, |
| "rewards/MultiModalAccuracyORM": 0.2666666716337204, |
| "step": 630, |
| "train_speed(iter/s)": 0.041685 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.0, |
| "epoch": 0.25656565656565655, |
| "grad_norm": 15.203675270080566, |
| "kl": 0.06846466064453124, |
| "learning_rate": 2e-07, |
| "loss": 0.011408740282058715, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333334028720856, |
| "reward_std": 0.3008869707584381, |
| "rewards/MultiModalAccuracyORM": 0.38333334028720856, |
| "step": 635, |
| "train_speed(iter/s)": 0.04173 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.05, |
| "epoch": 0.2585858585858586, |
| "grad_norm": 32.77607727050781, |
| "kl": 0.12814788818359374, |
| "learning_rate": 2e-07, |
| "loss": -0.0371063232421875, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000081956387, |
| "reward_std": 0.36673698723316195, |
| "rewards/MultiModalAccuracyORM": 0.3250000081956387, |
| "step": 640, |
| "train_speed(iter/s)": 0.041758 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.0, |
| "epoch": 0.2606060606060606, |
| "grad_norm": 15.344500541687012, |
| "kl": 0.105792236328125, |
| "learning_rate": 2e-07, |
| "loss": -0.006553761661052704, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333507180213, |
| "reward_std": 0.2597057640552521, |
| "rewards/MultiModalAccuracyORM": 0.20833333507180213, |
| "step": 645, |
| "train_speed(iter/s)": 0.041814 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 46.8, |
| "epoch": 0.26262626262626265, |
| "grad_norm": 16.03054428100586, |
| "kl": 0.04459686279296875, |
| "learning_rate": 2e-07, |
| "loss": 0.036105594038963316, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000298023223, |
| "reward_std": 0.2403598755598068, |
| "rewards/MultiModalAccuracyORM": 0.22500000298023223, |
| "step": 650, |
| "train_speed(iter/s)": 0.041793 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.05, |
| "epoch": 0.26464646464646463, |
| "grad_norm": 17.309656143188477, |
| "kl": 0.11004905700683594, |
| "learning_rate": 2e-07, |
| "loss": 0.017519061267375947, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.3682032287120819, |
| "rewards/MultiModalAccuracyORM": 0.2083333395421505, |
| "step": 655, |
| "train_speed(iter/s)": 0.04181 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.3, |
| "epoch": 0.26666666666666666, |
| "grad_norm": 4.0642170906066895, |
| "kl": 0.054970169067382814, |
| "learning_rate": 2e-07, |
| "loss": -0.008081305027008056, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333879709244, |
| "reward_std": 0.23230449259281158, |
| "rewards/MultiModalAccuracyORM": 0.28333333879709244, |
| "step": 660, |
| "train_speed(iter/s)": 0.041838 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 45.25, |
| "epoch": 0.2686868686868687, |
| "grad_norm": 7.022747993469238, |
| "kl": 0.10093574523925782, |
| "learning_rate": 2e-07, |
| "loss": 0.027714025974273682, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.14166666865348815, |
| "reward_std": 0.2531497746706009, |
| "rewards/MultiModalAccuracyORM": 0.14166666865348815, |
| "step": 665, |
| "train_speed(iter/s)": 0.041812 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.75, |
| "epoch": 0.27070707070707073, |
| "grad_norm": 9.984959602355957, |
| "kl": 0.023084259033203124, |
| "learning_rate": 2e-07, |
| "loss": 0.026220232248306274, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.20661829113960267, |
| "rewards/MultiModalAccuracyORM": 0.10000000149011612, |
| "step": 670, |
| "train_speed(iter/s)": 0.041806 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 38.05, |
| "epoch": 0.2727272727272727, |
| "grad_norm": 7.702730178833008, |
| "kl": 0.16024627685546874, |
| "learning_rate": 2e-07, |
| "loss": -0.049201831221580505, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000670552254, |
| "reward_std": 0.20817729830741882, |
| "rewards/MultiModalAccuracyORM": 0.32500000670552254, |
| "step": 675, |
| "train_speed(iter/s)": 0.041821 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.9, |
| "epoch": 0.27474747474747474, |
| "grad_norm": 0.16480083763599396, |
| "kl": 0.03549041748046875, |
| "learning_rate": 2e-07, |
| "loss": 0.006150122731924057, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333432674409, |
| "reward_std": 0.2323400765657425, |
| "rewards/MultiModalAccuracyORM": 0.20833333432674409, |
| "step": 680, |
| "train_speed(iter/s)": 0.041846 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.8, |
| "epoch": 0.2767676767676768, |
| "grad_norm": 0.027387158945202827, |
| "kl": 0.10235595703125, |
| "learning_rate": 2e-07, |
| "loss": 0.02902156114578247, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15833333507180214, |
| "reward_std": 0.13583914041519166, |
| "rewards/MultiModalAccuracyORM": 0.15833333507180214, |
| "step": 685, |
| "train_speed(iter/s)": 0.041856 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.5, |
| "epoch": 0.2787878787878788, |
| "grad_norm": 6.602695465087891, |
| "kl": 0.0608123779296875, |
| "learning_rate": 2e-07, |
| "loss": 0.012946502864360809, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667312383653, |
| "reward_std": 0.33300994634628295, |
| "rewards/MultiModalAccuracyORM": 0.21666667312383653, |
| "step": 690, |
| "train_speed(iter/s)": 0.04186 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.4, |
| "epoch": 0.2808080808080808, |
| "grad_norm": 3.4819886684417725, |
| "kl": 0.12022647857666016, |
| "learning_rate": 2e-07, |
| "loss": 0.02661624550819397, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000298023224, |
| "reward_std": 0.1981794685125351, |
| "rewards/MultiModalAccuracyORM": 0.20000000298023224, |
| "step": 695, |
| "train_speed(iter/s)": 0.041875 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.45, |
| "epoch": 0.2828282828282828, |
| "grad_norm": 9.789923667907715, |
| "kl": 0.06219940185546875, |
| "learning_rate": 2e-07, |
| "loss": -0.0169070765376091, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.31451369225978854, |
| "rewards/MultiModalAccuracyORM": 0.24166667088866234, |
| "step": 700, |
| "train_speed(iter/s)": 0.041904 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 26.7, |
| "epoch": 0.28484848484848485, |
| "grad_norm": 4.8883514404296875, |
| "kl": 0.0865386962890625, |
| "learning_rate": 2e-07, |
| "loss": -0.01697884649038315, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667312383654, |
| "reward_std": 0.37380772531032563, |
| "rewards/MultiModalAccuracyORM": 0.41666667312383654, |
| "step": 705, |
| "train_speed(iter/s)": 0.041918 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.65, |
| "epoch": 0.2868686868686869, |
| "grad_norm": 0.24715355038642883, |
| "kl": 0.1329193115234375, |
| "learning_rate": 2e-07, |
| "loss": 0.030154657363891602, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666746139526, |
| "reward_std": 0.1888910174369812, |
| "rewards/MultiModalAccuracyORM": 0.2916666746139526, |
| "step": 710, |
| "train_speed(iter/s)": 0.041945 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.05, |
| "epoch": 0.28888888888888886, |
| "grad_norm": 20.6412296295166, |
| "kl": 0.0775299072265625, |
| "learning_rate": 2e-07, |
| "loss": 0.010814273357391357, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3666666731238365, |
| "reward_std": 0.23236408829689026, |
| "rewards/MultiModalAccuracyORM": 0.3666666731238365, |
| "step": 715, |
| "train_speed(iter/s)": 0.041844 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 67.7, |
| "epoch": 0.2909090909090909, |
| "grad_norm": 19.74690055847168, |
| "kl": 0.0287322998046875, |
| "learning_rate": 2e-07, |
| "loss": 0.011786083877086639, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666753590107, |
| "reward_std": 0.3597048044204712, |
| "rewards/MultiModalAccuracyORM": 0.3916666753590107, |
| "step": 720, |
| "train_speed(iter/s)": 0.041856 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.95, |
| "epoch": 0.29292929292929293, |
| "grad_norm": 12.01062297821045, |
| "kl": 0.0283416748046875, |
| "learning_rate": 2e-07, |
| "loss": 0.030677640438079835, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333447575569, |
| "reward_std": 0.37494559586048126, |
| "rewards/MultiModalAccuracyORM": 0.3083333447575569, |
| "step": 725, |
| "train_speed(iter/s)": 0.041837 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.1, |
| "epoch": 0.29494949494949496, |
| "grad_norm": 18.26583480834961, |
| "kl": 0.048813819885253906, |
| "learning_rate": 2e-07, |
| "loss": 0.00018847386818379163, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000298023225, |
| "reward_std": 0.2855865776538849, |
| "rewards/MultiModalAccuracyORM": 0.15000000298023225, |
| "step": 730, |
| "train_speed(iter/s)": 0.041864 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.2, |
| "epoch": 0.296969696969697, |
| "grad_norm": 23.585920333862305, |
| "kl": 0.10856704711914063, |
| "learning_rate": 2e-07, |
| "loss": -0.010623668134212495, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.2486636757850647, |
| "rewards/MultiModalAccuracyORM": 0.2833333402872086, |
| "step": 735, |
| "train_speed(iter/s)": 0.041867 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 36.85, |
| "epoch": 0.298989898989899, |
| "grad_norm": 13.779229164123535, |
| "kl": 0.16164474487304686, |
| "learning_rate": 2e-07, |
| "loss": 0.09003554582595825, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000096857548, |
| "reward_std": 0.3144781023263931, |
| "rewards/MultiModalAccuracyORM": 0.3500000096857548, |
| "step": 740, |
| "train_speed(iter/s)": 0.041866 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.1, |
| "epoch": 0.301010101010101, |
| "grad_norm": 5.112743377685547, |
| "kl": 0.06104888916015625, |
| "learning_rate": 2e-07, |
| "loss": 0.006612183898687363, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333730697633, |
| "reward_std": 0.24261614382267, |
| "rewards/MultiModalAccuracyORM": 0.13333333730697633, |
| "step": 745, |
| "train_speed(iter/s)": 0.041874 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 3.3870651721954346, |
| "learning_rate": 2e-07, |
| "loss": 0.007025846093893051, |
| "memory(GiB)": 104.49, |
| "step": 750, |
| "train_speed(iter/s)": 0.041879 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 26.371667375564574, |
| "eval_kl": 0.08423469543457031, |
| "eval_loss": 0.020288411527872086, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.3050000049173832, |
| "eval_reward_std": 0.28924588978290555, |
| "eval_rewards/MultiModalAccuracyORM": 0.3050000049173832, |
| "eval_runtime": 257.2173, |
| "eval_samples_per_second": 0.194, |
| "eval_steps_per_second": 0.019, |
| "step": 750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.575, |
| "epoch": 0.30505050505050507, |
| "grad_norm": 3.0410096645355225, |
| "kl": 0.09359779357910156, |
| "learning_rate": 2e-07, |
| "loss": 0.01778276413679123, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20416667126119137, |
| "reward_std": 0.21572377979755403, |
| "rewards/MultiModalAccuracyORM": 0.20416667126119137, |
| "step": 755, |
| "train_speed(iter/s)": 0.041122 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.25, |
| "epoch": 0.30707070707070705, |
| "grad_norm": 13.25398063659668, |
| "kl": 0.0601959228515625, |
| "learning_rate": 2e-07, |
| "loss": -0.023943953216075897, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666666939854622, |
| "reward_std": 0.24010565578937532, |
| "rewards/MultiModalAccuracyORM": 0.11666666939854622, |
| "step": 760, |
| "train_speed(iter/s)": 0.041142 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.05, |
| "epoch": 0.3090909090909091, |
| "grad_norm": 0.06504862755537033, |
| "kl": 0.0304901123046875, |
| "learning_rate": 2e-07, |
| "loss": -0.007498346269130707, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333387970924, |
| "reward_std": 0.3021644026041031, |
| "rewards/MultiModalAccuracyORM": 0.3833333387970924, |
| "step": 765, |
| "train_speed(iter/s)": 0.041185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 50.95, |
| "epoch": 0.3111111111111111, |
| "grad_norm": 18.189159393310547, |
| "kl": 0.0461090087890625, |
| "learning_rate": 2e-07, |
| "loss": -0.0027750393375754355, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.29709570705890653, |
| "rewards/MultiModalAccuracyORM": 0.2916666716337204, |
| "step": 770, |
| "train_speed(iter/s)": 0.041169 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.85, |
| "epoch": 0.31313131313131315, |
| "grad_norm": 0.3038291931152344, |
| "kl": 0.03930206298828125, |
| "learning_rate": 2e-07, |
| "loss": -0.0053185861557722095, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1666666731238365, |
| "reward_std": 0.2386302560567856, |
| "rewards/MultiModalAccuracyORM": 0.1666666731238365, |
| "step": 775, |
| "train_speed(iter/s)": 0.041176 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.35, |
| "epoch": 0.3151515151515151, |
| "grad_norm": 10.563432693481445, |
| "kl": 0.02420806884765625, |
| "learning_rate": 2e-07, |
| "loss": -0.005909685418009758, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10833333432674408, |
| "reward_std": 0.2135300010442734, |
| "rewards/MultiModalAccuracyORM": 0.10833333432674408, |
| "step": 780, |
| "train_speed(iter/s)": 0.041208 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.1, |
| "epoch": 0.31717171717171716, |
| "grad_norm": 5.078320503234863, |
| "kl": 0.026453018188476562, |
| "learning_rate": 2e-07, |
| "loss": 0.0009352466091513634, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666738688946, |
| "reward_std": 0.256683474779129, |
| "rewards/MultiModalAccuracyORM": 0.2666666738688946, |
| "step": 785, |
| "train_speed(iter/s)": 0.041229 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.35, |
| "epoch": 0.3191919191919192, |
| "grad_norm": 10.143798828125, |
| "kl": 0.03321533203125, |
| "learning_rate": 2e-07, |
| "loss": 0.012424397468566894, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000037252903, |
| "reward_std": 0.26597192585468293, |
| "rewards/MultiModalAccuracyORM": 0.2750000037252903, |
| "step": 790, |
| "train_speed(iter/s)": 0.041249 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 163.15, |
| "epoch": 0.3212121212121212, |
| "grad_norm": 0.5449197888374329, |
| "kl": 0.019189453125, |
| "learning_rate": 2e-07, |
| "loss": 0.030487871170043944, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333447575569, |
| "reward_std": 0.39207376539707184, |
| "rewards/MultiModalAccuracyORM": 0.2583333447575569, |
| "step": 795, |
| "train_speed(iter/s)": 0.041183 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 22.25, |
| "epoch": 0.32323232323232326, |
| "grad_norm": 1.004371166229248, |
| "kl": 0.037581253051757815, |
| "learning_rate": 2e-07, |
| "loss": 0.0017656445503234862, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666693985462, |
| "reward_std": 0.25270916521549225, |
| "rewards/MultiModalAccuracyORM": 0.2666666693985462, |
| "step": 800, |
| "train_speed(iter/s)": 0.041199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.95, |
| "epoch": 0.32525252525252524, |
| "grad_norm": 19.628896713256836, |
| "kl": 0.053558349609375, |
| "learning_rate": 2e-07, |
| "loss": -0.021615955233573913, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1666666693985462, |
| "reward_std": 0.32094223201274874, |
| "rewards/MultiModalAccuracyORM": 0.1666666693985462, |
| "step": 805, |
| "train_speed(iter/s)": 0.041213 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.7, |
| "epoch": 0.32727272727272727, |
| "grad_norm": 6.42383337020874, |
| "kl": 0.18563766479492189, |
| "learning_rate": 2e-07, |
| "loss": 0.033368897438049314, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.2744703501462936, |
| "rewards/MultiModalAccuracyORM": 0.25833333656191826, |
| "step": 810, |
| "train_speed(iter/s)": 0.041234 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.8, |
| "epoch": 0.3292929292929293, |
| "grad_norm": 3.2321925163269043, |
| "kl": 0.08846683502197265, |
| "learning_rate": 2e-07, |
| "loss": 0.003480428457260132, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000298023225, |
| "reward_std": 0.2953897833824158, |
| "rewards/MultiModalAccuracyORM": 0.15000000298023225, |
| "step": 815, |
| "train_speed(iter/s)": 0.041242 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.85, |
| "epoch": 0.33131313131313134, |
| "grad_norm": 5.854945659637451, |
| "kl": 0.011492156982421875, |
| "learning_rate": 2e-07, |
| "loss": -0.008568185567855834, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666716337204, |
| "reward_std": 0.3172461599111557, |
| "rewards/MultiModalAccuracyORM": 0.2666666716337204, |
| "step": 820, |
| "train_speed(iter/s)": 0.041263 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.25, |
| "epoch": 0.3333333333333333, |
| "grad_norm": 17.020723342895508, |
| "kl": 0.029691314697265624, |
| "learning_rate": 2e-07, |
| "loss": -0.010567378997802735, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.34933354556560514, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 825, |
| "train_speed(iter/s)": 0.041286 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.2, |
| "epoch": 0.33535353535353535, |
| "grad_norm": 12.575139999389648, |
| "kl": 0.0603668212890625, |
| "learning_rate": 2e-07, |
| "loss": -0.0004529397003352642, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333432674408, |
| "reward_std": 0.30291883945465087, |
| "rewards/MultiModalAccuracyORM": 0.3833333432674408, |
| "step": 830, |
| "train_speed(iter/s)": 0.041299 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.7, |
| "epoch": 0.3373737373737374, |
| "grad_norm": 2.0305564403533936, |
| "kl": 0.08530197143554688, |
| "learning_rate": 2e-07, |
| "loss": -0.0174559086561203, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333380520344, |
| "reward_std": 0.36567819118499756, |
| "rewards/MultiModalAccuracyORM": 0.3083333380520344, |
| "step": 835, |
| "train_speed(iter/s)": 0.041331 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 59.2, |
| "epoch": 0.3393939393939394, |
| "grad_norm": 6.523157119750977, |
| "kl": 0.098590087890625, |
| "learning_rate": 2e-07, |
| "loss": -0.014323845505714417, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.31820976436138154, |
| "rewards/MultiModalAccuracyORM": 0.2916666716337204, |
| "step": 840, |
| "train_speed(iter/s)": 0.04132 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 123.2, |
| "epoch": 0.3414141414141414, |
| "grad_norm": 4.560072422027588, |
| "kl": 0.010162353515625, |
| "learning_rate": 2e-07, |
| "loss": 0.02465280294418335, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000447034835, |
| "reward_std": 0.25512446761131286, |
| "rewards/MultiModalAccuracyORM": 0.22500000447034835, |
| "step": 845, |
| "train_speed(iter/s)": 0.041291 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 52.65, |
| "epoch": 0.3434343434343434, |
| "grad_norm": 0.2115914523601532, |
| "kl": 0.1222564697265625, |
| "learning_rate": 2e-07, |
| "loss": 0.01849503219127655, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1416666679084301, |
| "reward_std": 0.18255070447921753, |
| "rewards/MultiModalAccuracyORM": 0.1416666679084301, |
| "step": 850, |
| "train_speed(iter/s)": 0.041263 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.6, |
| "epoch": 0.34545454545454546, |
| "grad_norm": 8.007162094116211, |
| "kl": 0.06471099853515624, |
| "learning_rate": 2e-07, |
| "loss": -0.027201026678085327, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667386889455, |
| "reward_std": 0.27853985130786896, |
| "rewards/MultiModalAccuracyORM": 0.41666667386889455, |
| "step": 855, |
| "train_speed(iter/s)": 0.041287 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.55, |
| "epoch": 0.3474747474747475, |
| "grad_norm": 14.470208168029785, |
| "kl": 0.07525177001953125, |
| "learning_rate": 2e-07, |
| "loss": -0.01188465803861618, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.3921093553304672, |
| "rewards/MultiModalAccuracyORM": 0.33333334028720857, |
| "step": 860, |
| "train_speed(iter/s)": 0.041297 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.45, |
| "epoch": 0.34949494949494947, |
| "grad_norm": 11.233606338500977, |
| "kl": 0.10040740966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.02309779226779938, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000037252903, |
| "reward_std": 0.23634997606277466, |
| "rewards/MultiModalAccuracyORM": 0.3000000037252903, |
| "step": 865, |
| "train_speed(iter/s)": 0.041313 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 91.9, |
| "epoch": 0.3515151515151515, |
| "grad_norm": 22.588499069213867, |
| "kl": 0.11612701416015625, |
| "learning_rate": 2e-07, |
| "loss": 0.020076577365398408, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000298023225, |
| "reward_std": 0.26677650213241577, |
| "rewards/MultiModalAccuracyORM": 0.15000000298023225, |
| "step": 870, |
| "train_speed(iter/s)": 0.041279 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.75, |
| "epoch": 0.35353535353535354, |
| "grad_norm": 8.226666450500488, |
| "kl": 0.05343475341796875, |
| "learning_rate": 2e-07, |
| "loss": -0.014575448632240296, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333395421505, |
| "reward_std": 0.37199449837207793, |
| "rewards/MultiModalAccuracyORM": 0.3333333395421505, |
| "step": 875, |
| "train_speed(iter/s)": 0.041283 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 81.1, |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.5393237471580505, |
| "kl": 0.0892120361328125, |
| "learning_rate": 2e-07, |
| "loss": 0.006313225626945496, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333365619183, |
| "reward_std": 0.20661829113960267, |
| "rewards/MultiModalAccuracyORM": 0.2833333365619183, |
| "step": 880, |
| "train_speed(iter/s)": 0.041271 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 26.1, |
| "epoch": 0.3575757575757576, |
| "grad_norm": 0.05410289764404297, |
| "kl": 0.015875244140625, |
| "learning_rate": 2e-07, |
| "loss": 0.0006564079783856868, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333432674407, |
| "reward_std": 0.14888326525688172, |
| "rewards/MultiModalAccuracyORM": 0.28333333432674407, |
| "step": 885, |
| "train_speed(iter/s)": 0.04124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 51.8, |
| "epoch": 0.3595959595959596, |
| "grad_norm": 16.54722785949707, |
| "kl": 0.0899993896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.010663460195064544, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333410322666, |
| "reward_std": 0.2636228919029236, |
| "rewards/MultiModalAccuracyORM": 0.3083333410322666, |
| "step": 890, |
| "train_speed(iter/s)": 0.041246 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.9, |
| "epoch": 0.3616161616161616, |
| "grad_norm": 10.844444274902344, |
| "kl": 0.194525146484375, |
| "learning_rate": 2e-07, |
| "loss": -0.04198589324951172, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4833333432674408, |
| "reward_std": 0.36594696044921876, |
| "rewards/MultiModalAccuracyORM": 0.4833333432674408, |
| "step": 895, |
| "train_speed(iter/s)": 0.04126 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 30.25, |
| "epoch": 0.36363636363636365, |
| "grad_norm": 5.428062915802002, |
| "kl": 0.0639495849609375, |
| "learning_rate": 2e-07, |
| "loss": -0.029673090577125548, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667237877846, |
| "reward_std": 0.2916341096162796, |
| "rewards/MultiModalAccuracyORM": 0.21666667237877846, |
| "step": 900, |
| "train_speed(iter/s)": 0.041259 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.9, |
| "epoch": 0.3656565656565657, |
| "grad_norm": 0.12221446633338928, |
| "kl": 0.07857627868652343, |
| "learning_rate": 2e-07, |
| "loss": -0.016133570671081544, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000447034837, |
| "reward_std": 0.2135299950838089, |
| "rewards/MultiModalAccuracyORM": 0.37500000447034837, |
| "step": 905, |
| "train_speed(iter/s)": 0.041282 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 4.65, |
| "epoch": 0.36767676767676766, |
| "grad_norm": 28.893342971801758, |
| "kl": 0.09071540832519531, |
| "learning_rate": 2e-07, |
| "loss": 0.006183768063783646, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3583333358168602, |
| "reward_std": 0.23309451341629028, |
| "rewards/MultiModalAccuracyORM": 0.3583333358168602, |
| "step": 910, |
| "train_speed(iter/s)": 0.041302 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.4, |
| "epoch": 0.3696969696969697, |
| "grad_norm": 0.04850845783948898, |
| "kl": 0.03702239990234375, |
| "learning_rate": 2e-07, |
| "loss": 0.031198829412460327, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333380520344, |
| "reward_std": 0.2325587123632431, |
| "rewards/MultiModalAccuracyORM": 0.3083333380520344, |
| "step": 915, |
| "train_speed(iter/s)": 0.041316 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 88.5, |
| "epoch": 0.3717171717171717, |
| "grad_norm": 6.006438732147217, |
| "kl": 0.09772415161132812, |
| "learning_rate": 2e-07, |
| "loss": 0.02726798951625824, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333417773247, |
| "reward_std": 0.3043610692024231, |
| "rewards/MultiModalAccuracyORM": 0.3083333417773247, |
| "step": 920, |
| "train_speed(iter/s)": 0.041277 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 35.3, |
| "epoch": 0.37373737373737376, |
| "grad_norm": 1.342499852180481, |
| "kl": 0.011273193359375, |
| "learning_rate": 2e-07, |
| "loss": 0.00134199857711792, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.15000000074505807, |
| "reward_std": 0.13182924091815948, |
| "rewards/MultiModalAccuracyORM": 0.15000000074505807, |
| "step": 925, |
| "train_speed(iter/s)": 0.041269 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.45, |
| "epoch": 0.37575757575757573, |
| "grad_norm": 3.2022011280059814, |
| "kl": 0.165765380859375, |
| "learning_rate": 2e-07, |
| "loss": -0.004855489730834961, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333879709245, |
| "reward_std": 0.33156771659851075, |
| "rewards/MultiModalAccuracyORM": 0.35833333879709245, |
| "step": 930, |
| "train_speed(iter/s)": 0.041287 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 50.05, |
| "epoch": 0.37777777777777777, |
| "grad_norm": 0.07847103476524353, |
| "kl": 0.05077667236328125, |
| "learning_rate": 2e-07, |
| "loss": -0.023166632652282713, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4166666716337204, |
| "reward_std": 0.32526595890522003, |
| "rewards/MultiModalAccuracyORM": 0.4166666716337204, |
| "step": 935, |
| "train_speed(iter/s)": 0.041275 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.15, |
| "epoch": 0.3797979797979798, |
| "grad_norm": 18.610437393188477, |
| "kl": 0.03169517517089844, |
| "learning_rate": 2e-07, |
| "loss": 0.024595724046230318, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333334028720855, |
| "reward_std": 0.3267322063446045, |
| "rewards/MultiModalAccuracyORM": 0.18333334028720855, |
| "step": 940, |
| "train_speed(iter/s)": 0.041277 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.45, |
| "epoch": 0.38181818181818183, |
| "grad_norm": 5.941343784332275, |
| "kl": 0.0661346435546875, |
| "learning_rate": 2e-07, |
| "loss": 0.024455997347831725, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333387970924, |
| "reward_std": 0.21973656117916107, |
| "rewards/MultiModalAccuracyORM": 0.2583333387970924, |
| "step": 945, |
| "train_speed(iter/s)": 0.04129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 37.05, |
| "epoch": 0.3838383838383838, |
| "grad_norm": 24.896520614624023, |
| "kl": 0.150213623046875, |
| "learning_rate": 2e-07, |
| "loss": 0.017214223742485046, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667312383654, |
| "reward_std": 0.33557761609554293, |
| "rewards/MultiModalAccuracyORM": 0.41666667312383654, |
| "step": 950, |
| "train_speed(iter/s)": 0.041288 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.5, |
| "epoch": 0.38585858585858585, |
| "grad_norm": 8.904081344604492, |
| "kl": 0.12316970825195313, |
| "learning_rate": 2e-07, |
| "loss": -0.0002661585807800293, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000022351742, |
| "reward_std": 0.21999078691005708, |
| "rewards/MultiModalAccuracyORM": 0.3000000022351742, |
| "step": 955, |
| "train_speed(iter/s)": 0.041304 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 31.05, |
| "epoch": 0.3878787878787879, |
| "grad_norm": 0.30000391602516174, |
| "kl": 0.193408203125, |
| "learning_rate": 2e-07, |
| "loss": -0.016391244530677796, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5916666716337204, |
| "reward_std": 0.15219832956790924, |
| "rewards/MultiModalAccuracyORM": 0.5916666716337204, |
| "step": 960, |
| "train_speed(iter/s)": 0.041312 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.6, |
| "epoch": 0.3898989898989899, |
| "grad_norm": 1.9883811473846436, |
| "kl": 0.046465301513671876, |
| "learning_rate": 2e-07, |
| "loss": -0.0011612892150878907, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667014360427, |
| "reward_std": 0.23930107951164245, |
| "rewards/MultiModalAccuracyORM": 0.24166667014360427, |
| "step": 965, |
| "train_speed(iter/s)": 0.041313 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.4, |
| "epoch": 0.39191919191919194, |
| "grad_norm": 17.314956665039062, |
| "kl": 0.10909576416015625, |
| "learning_rate": 2e-07, |
| "loss": 0.003603992611169815, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666666865348815, |
| "reward_std": 0.26976318955421447, |
| "rewards/MultiModalAccuracyORM": 0.26666666865348815, |
| "step": 970, |
| "train_speed(iter/s)": 0.04131 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 26.45, |
| "epoch": 0.3939393939393939, |
| "grad_norm": 2.700242042541504, |
| "kl": 0.0446197509765625, |
| "learning_rate": 2e-07, |
| "loss": -0.024584516882896423, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.09166666865348816, |
| "reward_std": 0.23854664266109465, |
| "rewards/MultiModalAccuracyORM": 0.09166666865348816, |
| "step": 975, |
| "train_speed(iter/s)": 0.041305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.85, |
| "epoch": 0.39595959595959596, |
| "grad_norm": 1.759245753288269, |
| "kl": 0.0932861328125, |
| "learning_rate": 2e-07, |
| "loss": 0.03299914002418518, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334028720856, |
| "reward_std": 0.22785155177116395, |
| "rewards/MultiModalAccuracyORM": 0.23333334028720856, |
| "step": 980, |
| "train_speed(iter/s)": 0.041315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.7, |
| "epoch": 0.397979797979798, |
| "grad_norm": 12.485607147216797, |
| "kl": 0.061135292053222656, |
| "learning_rate": 2e-07, |
| "loss": 0.022333118319511413, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667088866236, |
| "reward_std": 0.27903059422969817, |
| "rewards/MultiModalAccuracyORM": 0.26666667088866236, |
| "step": 985, |
| "train_speed(iter/s)": 0.041318 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 48.65, |
| "epoch": 0.4, |
| "grad_norm": 4.170945644378662, |
| "kl": 0.0902923583984375, |
| "learning_rate": 2e-07, |
| "loss": -0.00014310678234323858, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.2511145681142807, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 990, |
| "train_speed(iter/s)": 0.041309 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.25, |
| "epoch": 0.402020202020202, |
| "grad_norm": 2.5125696659088135, |
| "kl": 0.12824859619140624, |
| "learning_rate": 2e-07, |
| "loss": 0.0361581027507782, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5250000052154065, |
| "reward_std": 0.2526735752820969, |
| "rewards/MultiModalAccuracyORM": 0.5250000052154065, |
| "step": 995, |
| "train_speed(iter/s)": 0.041331 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 24.84500503540039, |
| "learning_rate": 2e-07, |
| "loss": -0.03532302379608154, |
| "memory(GiB)": 104.49, |
| "step": 1000, |
| "train_speed(iter/s)": 0.041234 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 40.71333456993103, |
| "eval_kl": 0.09849456787109374, |
| "eval_loss": 0.019675862044095993, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.36166667461395263, |
| "eval_reward_std": 0.2775319296121597, |
| "eval_rewards/MultiModalAccuracyORM": 0.36166667461395263, |
| "eval_runtime": 294.4392, |
| "eval_samples_per_second": 0.17, |
| "eval_steps_per_second": 0.017, |
| "step": 1000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 63.525, |
| "epoch": 0.40606060606060607, |
| "grad_norm": 2.5043818950653076, |
| "kl": 0.041501617431640624, |
| "learning_rate": 2e-07, |
| "loss": -0.008308599889278411, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.21963488459587097, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 1005, |
| "train_speed(iter/s)": 0.040624 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.05, |
| "epoch": 0.4080808080808081, |
| "grad_norm": 19.067171096801758, |
| "kl": 0.07535552978515625, |
| "learning_rate": 2e-07, |
| "loss": 0.017892301082611084, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.22005038857460021, |
| "rewards/MultiModalAccuracyORM": 0.33333334028720857, |
| "step": 1010, |
| "train_speed(iter/s)": 0.040645 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 70.6, |
| "epoch": 0.4101010101010101, |
| "grad_norm": 2.5989065170288086, |
| "kl": 0.0229217529296875, |
| "learning_rate": 2e-07, |
| "loss": 0.040188026428222653, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.25591449439525604, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 1015, |
| "train_speed(iter/s)": 0.040633 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 58.65, |
| "epoch": 0.4121212121212121, |
| "grad_norm": 11.748002052307129, |
| "kl": 0.06688776016235351, |
| "learning_rate": 2e-07, |
| "loss": -0.0008021335117518902, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666666939854622, |
| "reward_std": 0.17150862216949464, |
| "rewards/MultiModalAccuracyORM": 0.11666666939854622, |
| "step": 1020, |
| "train_speed(iter/s)": 0.040631 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.15, |
| "epoch": 0.41414141414141414, |
| "grad_norm": 0.12045960873365402, |
| "kl": 0.03296966552734375, |
| "learning_rate": 2e-07, |
| "loss": -0.010370378196239472, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.11666667088866234, |
| "reward_std": 0.17081378698348998, |
| "rewards/MultiModalAccuracyORM": 0.11666667088866234, |
| "step": 1025, |
| "train_speed(iter/s)": 0.040649 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.85, |
| "epoch": 0.4161616161616162, |
| "grad_norm": 1.6403871774673462, |
| "kl": 0.06666259765625, |
| "learning_rate": 2e-07, |
| "loss": 0.00585133358836174, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000074505806, |
| "reward_std": 0.17705594301223754, |
| "rewards/MultiModalAccuracyORM": 0.17500000074505806, |
| "step": 1030, |
| "train_speed(iter/s)": 0.040624 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 34.3, |
| "epoch": 0.41818181818181815, |
| "grad_norm": 0.014441369101405144, |
| "kl": 0.07417640686035157, |
| "learning_rate": 2e-07, |
| "loss": -0.010604190826416015, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000298023223, |
| "reward_std": 0.2003761351108551, |
| "rewards/MultiModalAccuracyORM": 0.22500000298023223, |
| "step": 1035, |
| "train_speed(iter/s)": 0.040636 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.6, |
| "epoch": 0.4202020202020202, |
| "grad_norm": 6.607668399810791, |
| "kl": 0.1425227165222168, |
| "learning_rate": 2e-07, |
| "loss": 0.02794753313064575, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3583333395421505, |
| "reward_std": 0.2567190647125244, |
| "rewards/MultiModalAccuracyORM": 0.3583333395421505, |
| "step": 1040, |
| "train_speed(iter/s)": 0.040638 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.6, |
| "epoch": 0.4222222222222222, |
| "grad_norm": 0.8122760057449341, |
| "kl": 0.1528533935546875, |
| "learning_rate": 2e-07, |
| "loss": 0.019382116198539735, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000670552254, |
| "reward_std": 0.2034369796514511, |
| "rewards/MultiModalAccuracyORM": 0.17500000670552254, |
| "step": 1045, |
| "train_speed(iter/s)": 0.040661 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.85, |
| "epoch": 0.42424242424242425, |
| "grad_norm": 0.18659576773643494, |
| "kl": 0.010857391357421874, |
| "learning_rate": 2e-07, |
| "loss": 0.015965181589126586, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.1660114347934723, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 1050, |
| "train_speed(iter/s)": 0.040661 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.25, |
| "epoch": 0.4262626262626263, |
| "grad_norm": 0.4390380382537842, |
| "kl": 0.07591552734375, |
| "learning_rate": 2e-07, |
| "loss": 0.011004485189914703, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4250000111758709, |
| "reward_std": 0.24862808585166932, |
| "rewards/MultiModalAccuracyORM": 0.4250000111758709, |
| "step": 1055, |
| "train_speed(iter/s)": 0.040671 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 52.95, |
| "epoch": 0.42828282828282827, |
| "grad_norm": 0.3618135452270508, |
| "kl": 0.109490966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.011407237499952316, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333387970924, |
| "reward_std": 0.14589657187461852, |
| "rewards/MultiModalAccuracyORM": 0.3333333387970924, |
| "step": 1060, |
| "train_speed(iter/s)": 0.04069 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.1, |
| "epoch": 0.4303030303030303, |
| "grad_norm": 13.074536323547363, |
| "kl": 0.18959503173828124, |
| "learning_rate": 2e-07, |
| "loss": 0.04986717700958252, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000029802322, |
| "reward_std": 0.14589657187461852, |
| "rewards/MultiModalAccuracyORM": 0.3000000029802322, |
| "step": 1065, |
| "train_speed(iter/s)": 0.040694 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 28.9, |
| "epoch": 0.43232323232323233, |
| "grad_norm": 6.16197395324707, |
| "kl": 0.15793075561523437, |
| "learning_rate": 2e-07, |
| "loss": 0.06019207835197449, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334177732467, |
| "reward_std": 0.2669951319694519, |
| "rewards/MultiModalAccuracyORM": 0.28333334177732467, |
| "step": 1070, |
| "train_speed(iter/s)": 0.040701 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 27.05, |
| "epoch": 0.43434343434343436, |
| "grad_norm": 25.265649795532227, |
| "kl": 0.08460769653320313, |
| "learning_rate": 2e-07, |
| "loss": -0.04109536409378052, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3750000037252903, |
| "reward_std": 0.2325587123632431, |
| "rewards/MultiModalAccuracyORM": 0.3750000037252903, |
| "step": 1075, |
| "train_speed(iter/s)": 0.040703 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.0, |
| "epoch": 0.43636363636363634, |
| "grad_norm": 2.5213825702667236, |
| "kl": 0.089501953125, |
| "learning_rate": 2e-07, |
| "loss": 0.011518492549657821, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3333333387970924, |
| "reward_std": 0.35792474150657655, |
| "rewards/MultiModalAccuracyORM": 0.3333333387970924, |
| "step": 1080, |
| "train_speed(iter/s)": 0.040705 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 65.85, |
| "epoch": 0.4383838383838384, |
| "grad_norm": 2.2053442001342773, |
| "kl": 0.014685440063476562, |
| "learning_rate": 2e-07, |
| "loss": -0.03693766593933105, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666701436043, |
| "reward_std": 0.2370448112487793, |
| "rewards/MultiModalAccuracyORM": 0.2666666701436043, |
| "step": 1085, |
| "train_speed(iter/s)": 0.040693 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.2, |
| "epoch": 0.4404040404040404, |
| "grad_norm": 12.156472206115723, |
| "kl": 0.17877197265625, |
| "learning_rate": 2e-07, |
| "loss": 0.032665693759918214, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666746139526, |
| "reward_std": 0.27148365676403047, |
| "rewards/MultiModalAccuracyORM": 0.3416666746139526, |
| "step": 1090, |
| "train_speed(iter/s)": 0.040713 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.85, |
| "epoch": 0.44242424242424244, |
| "grad_norm": 1.4023343324661255, |
| "kl": 0.098193359375, |
| "learning_rate": 2e-07, |
| "loss": -0.007838453352451324, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000022351742, |
| "reward_std": 0.172567418217659, |
| "rewards/MultiModalAccuracyORM": 0.4000000022351742, |
| "step": 1095, |
| "train_speed(iter/s)": 0.040737 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 67.6, |
| "epoch": 0.4444444444444444, |
| "grad_norm": 10.351971626281738, |
| "kl": 0.02147979736328125, |
| "learning_rate": 2e-07, |
| "loss": 0.03331095576286316, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666775941849, |
| "reward_std": 0.3504018098115921, |
| "rewards/MultiModalAccuracyORM": 0.3166666775941849, |
| "step": 1100, |
| "train_speed(iter/s)": 0.04073 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.95, |
| "epoch": 0.44646464646464645, |
| "grad_norm": 13.833907127380371, |
| "kl": 0.019232177734375, |
| "learning_rate": 2e-07, |
| "loss": -0.005460131168365479, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000089406967, |
| "reward_std": 0.2667409062385559, |
| "rewards/MultiModalAccuracyORM": 0.3250000089406967, |
| "step": 1105, |
| "train_speed(iter/s)": 0.040741 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 26.65, |
| "epoch": 0.4484848484848485, |
| "grad_norm": 2.0316038131713867, |
| "kl": 0.018201828002929688, |
| "learning_rate": 2e-07, |
| "loss": -0.0024514278396964074, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333879709245, |
| "reward_std": 0.2793444275856018, |
| "rewards/MultiModalAccuracyORM": 0.35833333879709245, |
| "step": 1110, |
| "train_speed(iter/s)": 0.04076 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.0, |
| "epoch": 0.4505050505050505, |
| "grad_norm": 15.886459350585938, |
| "kl": 0.21325912475585937, |
| "learning_rate": 2e-07, |
| "loss": 0.0038191914558410645, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166666865348815, |
| "reward_std": 0.2245364874601364, |
| "rewards/MultiModalAccuracyORM": 0.24166666865348815, |
| "step": 1115, |
| "train_speed(iter/s)": 0.040791 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.4, |
| "epoch": 0.45252525252525255, |
| "grad_norm": 0.03295298293232918, |
| "kl": 0.1110443115234375, |
| "learning_rate": 2e-07, |
| "loss": 0.013870391249656677, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.27402731478214265, |
| "rewards/MultiModalAccuracyORM": 0.21666667088866234, |
| "step": 1120, |
| "train_speed(iter/s)": 0.040796 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 27.9, |
| "epoch": 0.45454545454545453, |
| "grad_norm": 2.8173696994781494, |
| "kl": 0.0269622802734375, |
| "learning_rate": 2e-07, |
| "loss": 0.03692147135734558, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333387970924, |
| "reward_std": 0.2159452974796295, |
| "rewards/MultiModalAccuracyORM": 0.3833333387970924, |
| "step": 1125, |
| "train_speed(iter/s)": 0.04082 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.45, |
| "epoch": 0.45656565656565656, |
| "grad_norm": 0.10465247184038162, |
| "kl": 0.04431991577148438, |
| "learning_rate": 2e-07, |
| "loss": 0.003530232235789299, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666701436043, |
| "reward_std": 0.2323044866323471, |
| "rewards/MultiModalAccuracyORM": 0.3166666701436043, |
| "step": 1130, |
| "train_speed(iter/s)": 0.040817 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.6, |
| "epoch": 0.4585858585858586, |
| "grad_norm": 0.32010194659233093, |
| "kl": 0.094537353515625, |
| "learning_rate": 2e-07, |
| "loss": 0.012909208238124848, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.0916666716337204, |
| "reward_std": 0.1293427586555481, |
| "rewards/MultiModalAccuracyORM": 0.0916666716337204, |
| "step": 1135, |
| "train_speed(iter/s)": 0.040832 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 67.8, |
| "epoch": 0.46060606060606063, |
| "grad_norm": 15.148902893066406, |
| "kl": 0.07255020141601562, |
| "learning_rate": 2e-07, |
| "loss": 0.016760605573654174, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000149011612, |
| "reward_std": 0.2260383188724518, |
| "rewards/MultiModalAccuracyORM": 0.20000000149011612, |
| "step": 1140, |
| "train_speed(iter/s)": 0.040831 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 54.75, |
| "epoch": 0.4626262626262626, |
| "grad_norm": 4.259115219116211, |
| "kl": 0.012025833129882812, |
| "learning_rate": 2e-07, |
| "loss": -0.004991362616419792, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000298023223, |
| "reward_std": 0.2003761351108551, |
| "rewards/MultiModalAccuracyORM": 0.12500000298023223, |
| "step": 1145, |
| "train_speed(iter/s)": 0.040832 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.7, |
| "epoch": 0.46464646464646464, |
| "grad_norm": 4.517999649047852, |
| "kl": 0.0364471435546875, |
| "learning_rate": 2e-07, |
| "loss": 0.0014625540003180503, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000149011614, |
| "reward_std": 0.18561154305934907, |
| "rewards/MultiModalAccuracyORM": 0.37500000149011614, |
| "step": 1150, |
| "train_speed(iter/s)": 0.040853 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.1, |
| "epoch": 0.4666666666666667, |
| "grad_norm": 9.037857055664062, |
| "kl": 0.066754150390625, |
| "learning_rate": 2e-07, |
| "loss": 0.023162148892879486, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000029802322, |
| "reward_std": 0.14589657187461852, |
| "rewards/MultiModalAccuracyORM": 0.3000000029802322, |
| "step": 1155, |
| "train_speed(iter/s)": 0.040895 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.7, |
| "epoch": 0.4686868686868687, |
| "grad_norm": 0.35684671998023987, |
| "kl": 0.1403411865234375, |
| "learning_rate": 2e-07, |
| "loss": 0.011607617139816284, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.47500001043081286, |
| "reward_std": 0.19340355396270753, |
| "rewards/MultiModalAccuracyORM": 0.47500001043081286, |
| "step": 1160, |
| "train_speed(iter/s)": 0.040919 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.4, |
| "epoch": 0.4707070707070707, |
| "grad_norm": 0.18109376728534698, |
| "kl": 0.0370758056640625, |
| "learning_rate": 2e-07, |
| "loss": -0.0030417680740356446, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.1848811239004135, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 1165, |
| "train_speed(iter/s)": 0.040938 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.8, |
| "epoch": 0.4727272727272727, |
| "grad_norm": 17.05179786682129, |
| "kl": 0.027799224853515624, |
| "learning_rate": 2e-07, |
| "loss": -0.01608174741268158, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333656191824, |
| "reward_std": 0.25566026866436004, |
| "rewards/MultiModalAccuracyORM": 0.35833333656191824, |
| "step": 1170, |
| "train_speed(iter/s)": 0.040961 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 35.55, |
| "epoch": 0.47474747474747475, |
| "grad_norm": 2.053295850753784, |
| "kl": 0.0653228759765625, |
| "learning_rate": 2e-07, |
| "loss": 0.0025410931557416916, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000149011613, |
| "reward_std": 0.18780820965766906, |
| "rewards/MultiModalAccuracyORM": 0.30000000149011613, |
| "step": 1175, |
| "train_speed(iter/s)": 0.040966 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 3.65, |
| "epoch": 0.4767676767676768, |
| "grad_norm": 12.327520370483398, |
| "kl": 0.1503997802734375, |
| "learning_rate": 2e-07, |
| "loss": 0.00606456995010376, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.47500000298023226, |
| "reward_std": 0.16696292161941528, |
| "rewards/MultiModalAccuracyORM": 0.47500000298023226, |
| "step": 1180, |
| "train_speed(iter/s)": 0.040998 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.1, |
| "epoch": 0.47878787878787876, |
| "grad_norm": 0.1990954726934433, |
| "kl": 0.26718597412109374, |
| "learning_rate": 2e-07, |
| "loss": 0.011653450131416321, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333333656191826, |
| "reward_std": 0.27402731478214265, |
| "rewards/MultiModalAccuracyORM": 0.38333333656191826, |
| "step": 1185, |
| "train_speed(iter/s)": 0.041009 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.5, |
| "epoch": 0.4808080808080808, |
| "grad_norm": 5.806619644165039, |
| "kl": 0.059732818603515626, |
| "learning_rate": 2e-07, |
| "loss": -0.013705405592918395, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.09166667088866234, |
| "reward_std": 0.12558708488941192, |
| "rewards/MultiModalAccuracyORM": 0.09166667088866234, |
| "step": 1190, |
| "train_speed(iter/s)": 0.041032 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.2, |
| "epoch": 0.48282828282828283, |
| "grad_norm": 12.781750679016113, |
| "kl": 0.04134521484375, |
| "learning_rate": 2e-07, |
| "loss": -0.008668276667594909, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4416666693985462, |
| "reward_std": 0.2597057580947876, |
| "rewards/MultiModalAccuracyORM": 0.4416666693985462, |
| "step": 1195, |
| "train_speed(iter/s)": 0.041043 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.45, |
| "epoch": 0.48484848484848486, |
| "grad_norm": 3.4121592044830322, |
| "kl": 0.073028564453125, |
| "learning_rate": 2e-07, |
| "loss": -0.0033960781991481783, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333447575569, |
| "reward_std": 0.38452682793140414, |
| "rewards/MultiModalAccuracyORM": 0.2833333447575569, |
| "step": 1200, |
| "train_speed(iter/s)": 0.041068 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.65, |
| "epoch": 0.4868686868686869, |
| "grad_norm": 2.179175615310669, |
| "kl": 0.1186309814453125, |
| "learning_rate": 2e-07, |
| "loss": 0.0020799320191144943, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5500000081956387, |
| "reward_std": 0.383985635638237, |
| "rewards/MultiModalAccuracyORM": 0.5500000081956387, |
| "step": 1205, |
| "train_speed(iter/s)": 0.041076 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.0, |
| "epoch": 0.4888888888888889, |
| "grad_norm": 16.699316024780273, |
| "kl": 0.19964828491210937, |
| "learning_rate": 2e-07, |
| "loss": 0.07210339307785034, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667014360426, |
| "reward_std": 0.27151924669742583, |
| "rewards/MultiModalAccuracyORM": 0.41666667014360426, |
| "step": 1210, |
| "train_speed(iter/s)": 0.041084 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.2, |
| "epoch": 0.4909090909090909, |
| "grad_norm": 11.2245512008667, |
| "kl": 0.02044839859008789, |
| "learning_rate": 2e-07, |
| "loss": 0.0006846427917480469, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666738688946, |
| "reward_std": 0.22074522376060485, |
| "rewards/MultiModalAccuracyORM": 0.3166666738688946, |
| "step": 1215, |
| "train_speed(iter/s)": 0.041094 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.95, |
| "epoch": 0.49292929292929294, |
| "grad_norm": 23.733837127685547, |
| "kl": 0.0533355712890625, |
| "learning_rate": 2e-07, |
| "loss": -0.03312296569347382, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666666939854623, |
| "reward_std": 0.3827823489904404, |
| "rewards/MultiModalAccuracyORM": 0.21666666939854623, |
| "step": 1220, |
| "train_speed(iter/s)": 0.041103 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.4, |
| "epoch": 0.494949494949495, |
| "grad_norm": 5.569579124450684, |
| "kl": 0.12704048156738282, |
| "learning_rate": 2e-07, |
| "loss": -0.030297344923019408, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667386889455, |
| "reward_std": 0.3534030467271805, |
| "rewards/MultiModalAccuracyORM": 0.41666667386889455, |
| "step": 1225, |
| "train_speed(iter/s)": 0.041105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.15, |
| "epoch": 0.49696969696969695, |
| "grad_norm": 13.687773704528809, |
| "kl": 0.054621124267578126, |
| "learning_rate": 2e-07, |
| "loss": 0.020814248919487, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.2981545031070709, |
| "rewards/MultiModalAccuracyORM": 0.29166667312383654, |
| "step": 1230, |
| "train_speed(iter/s)": 0.041117 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.45, |
| "epoch": 0.498989898989899, |
| "grad_norm": 4.014401912689209, |
| "kl": 0.11805038452148438, |
| "learning_rate": 2e-07, |
| "loss": -0.014261078834533692, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666666865348814, |
| "reward_std": 0.24615318179130555, |
| "rewards/MultiModalAccuracyORM": 0.31666666865348814, |
| "step": 1235, |
| "train_speed(iter/s)": 0.041139 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 57.15, |
| "epoch": 0.501010101010101, |
| "grad_norm": 7.063708782196045, |
| "kl": 0.04602813720703125, |
| "learning_rate": 2e-07, |
| "loss": -0.0014480194076895714, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000052154064, |
| "reward_std": 0.2486636757850647, |
| "rewards/MultiModalAccuracyORM": 0.3500000052154064, |
| "step": 1240, |
| "train_speed(iter/s)": 0.041135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.65, |
| "epoch": 0.503030303030303, |
| "grad_norm": 0.07285178452730179, |
| "kl": 0.06838836669921874, |
| "learning_rate": 2e-07, |
| "loss": 0.007464568316936493, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.26703072190284727, |
| "rewards/MultiModalAccuracyORM": 0.25833333656191826, |
| "step": 1245, |
| "train_speed(iter/s)": 0.04113 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 16.691085815429688, |
| "learning_rate": 2e-07, |
| "loss": 0.027106884121894836, |
| "memory(GiB)": 104.49, |
| "step": 1250, |
| "train_speed(iter/s)": 0.041132 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 24.193333625793457, |
| "eval_kl": 0.0990032958984375, |
| "eval_loss": 0.013061273843050003, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.3783333380520344, |
| "eval_reward_std": 0.21932941377162934, |
| "eval_rewards/MultiModalAccuracyORM": 0.3783333380520344, |
| "eval_runtime": 254.2733, |
| "eval_samples_per_second": 0.197, |
| "eval_steps_per_second": 0.02, |
| "step": 1250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.45, |
| "epoch": 0.5070707070707071, |
| "grad_norm": 1.7288111448287964, |
| "kl": 0.14322261810302733, |
| "learning_rate": 2e-07, |
| "loss": -0.0040175896137952805, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32916667200624944, |
| "reward_std": 0.21599168032407762, |
| "rewards/MultiModalAccuracyORM": 0.32916667200624944, |
| "step": 1255, |
| "train_speed(iter/s)": 0.040698 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.8, |
| "epoch": 0.509090909090909, |
| "grad_norm": 30.862096786499023, |
| "kl": 0.065618896484375, |
| "learning_rate": 2e-07, |
| "loss": 0.03462098240852356, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666753590107, |
| "reward_std": 0.3471368789672852, |
| "rewards/MultiModalAccuracyORM": 0.3166666753590107, |
| "step": 1260, |
| "train_speed(iter/s)": 0.040722 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 56.65, |
| "epoch": 0.5111111111111111, |
| "grad_norm": 18.206647872924805, |
| "kl": 0.050946044921875, |
| "learning_rate": 2e-07, |
| "loss": -0.018359455466270446, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500001192092897, |
| "reward_std": 0.285042542219162, |
| "rewards/MultiModalAccuracyORM": 0.37500001192092897, |
| "step": 1265, |
| "train_speed(iter/s)": 0.040714 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.8, |
| "epoch": 0.5131313131313131, |
| "grad_norm": 21.11511993408203, |
| "kl": 0.08178558349609374, |
| "learning_rate": 2e-07, |
| "loss": 0.019801269471645355, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5000000059604645, |
| "reward_std": 0.24666163325309753, |
| "rewards/MultiModalAccuracyORM": 0.5000000059604645, |
| "step": 1270, |
| "train_speed(iter/s)": 0.040716 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.7, |
| "epoch": 0.5151515151515151, |
| "grad_norm": 2.3435275554656982, |
| "kl": 0.037060546875, |
| "learning_rate": 2e-07, |
| "loss": -0.044399937987327574, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000149011613, |
| "reward_std": 0.18780821561813354, |
| "rewards/MultiModalAccuracyORM": 0.30000000149011613, |
| "step": 1275, |
| "train_speed(iter/s)": 0.040729 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.3, |
| "epoch": 0.5171717171717172, |
| "grad_norm": 6.154475688934326, |
| "kl": 0.06382598876953124, |
| "learning_rate": 2e-07, |
| "loss": 0.024791686236858367, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.38333334103226663, |
| "reward_std": 0.3026406019926071, |
| "rewards/MultiModalAccuracyORM": 0.38333334103226663, |
| "step": 1280, |
| "train_speed(iter/s)": 0.040736 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 72.85, |
| "epoch": 0.5191919191919192, |
| "grad_norm": 0.17857688665390015, |
| "kl": 0.05196533203125, |
| "learning_rate": 2e-07, |
| "loss": -0.01656932532787323, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666716337204, |
| "reward_std": 0.32905964851379393, |
| "rewards/MultiModalAccuracyORM": 0.1916666716337204, |
| "step": 1285, |
| "train_speed(iter/s)": 0.040739 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.1, |
| "epoch": 0.5212121212121212, |
| "grad_norm": 5.7444353103637695, |
| "kl": 0.032296371459960935, |
| "learning_rate": 2e-07, |
| "loss": -0.04405757784843445, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.19166667237877846, |
| "reward_std": 0.3292782843112946, |
| "rewards/MultiModalAccuracyORM": 0.19166667237877846, |
| "step": 1290, |
| "train_speed(iter/s)": 0.04075 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.45, |
| "epoch": 0.5232323232323233, |
| "grad_norm": 1.938860297203064, |
| "kl": 0.04727783203125, |
| "learning_rate": 2e-07, |
| "loss": 0.001994212530553341, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4666666701436043, |
| "reward_std": 0.2953156381845474, |
| "rewards/MultiModalAccuracyORM": 0.4666666701436043, |
| "step": 1295, |
| "train_speed(iter/s)": 0.040768 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.9, |
| "epoch": 0.5252525252525253, |
| "grad_norm": 23.327890396118164, |
| "kl": 0.118865966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.020175328850746153, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333492279053, |
| "reward_std": 0.3596546709537506, |
| "rewards/MultiModalAccuracyORM": 0.3833333492279053, |
| "step": 1300, |
| "train_speed(iter/s)": 0.04078 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.4, |
| "epoch": 0.5272727272727272, |
| "grad_norm": 1.2604830265045166, |
| "kl": 0.082135009765625, |
| "learning_rate": 2e-07, |
| "loss": -0.006745982170104981, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000089406967, |
| "reward_std": 0.23631438612937927, |
| "rewards/MultiModalAccuracyORM": 0.3250000089406967, |
| "step": 1305, |
| "train_speed(iter/s)": 0.040788 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.1, |
| "epoch": 0.5292929292929293, |
| "grad_norm": 19.63453483581543, |
| "kl": 0.093505859375, |
| "learning_rate": 2e-07, |
| "loss": -0.01361556351184845, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334177732465, |
| "reward_std": 0.24337058067321776, |
| "rewards/MultiModalAccuracyORM": 0.33333334177732465, |
| "step": 1310, |
| "train_speed(iter/s)": 0.0408 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 64.75, |
| "epoch": 0.5313131313131313, |
| "grad_norm": 5.953737735748291, |
| "kl": 0.115643310546875, |
| "learning_rate": 2e-07, |
| "loss": 0.004205666109919548, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833334028720855, |
| "reward_std": 0.21123813688755036, |
| "rewards/MultiModalAccuracyORM": 0.30833334028720855, |
| "step": 1315, |
| "train_speed(iter/s)": 0.040801 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.55, |
| "epoch": 0.5333333333333333, |
| "grad_norm": 24.937227249145508, |
| "kl": 0.1268402099609375, |
| "learning_rate": 2e-07, |
| "loss": 0.0015925129875540734, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.26666667237877845, |
| "reward_std": 0.31119862794876096, |
| "rewards/MultiModalAccuracyORM": 0.26666667237877845, |
| "step": 1320, |
| "train_speed(iter/s)": 0.040816 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.35, |
| "epoch": 0.5353535353535354, |
| "grad_norm": 0.8153337240219116, |
| "kl": 0.150848388671875, |
| "learning_rate": 2e-07, |
| "loss": -0.021095672249794008, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667088866234, |
| "reward_std": 0.27402731478214265, |
| "rewards/MultiModalAccuracyORM": 0.21666667088866234, |
| "step": 1325, |
| "train_speed(iter/s)": 0.040834 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.35, |
| "epoch": 0.5373737373737374, |
| "grad_norm": 18.53838539123535, |
| "kl": 0.046075439453125, |
| "learning_rate": 2e-07, |
| "loss": 0.017172405123710634, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4500000089406967, |
| "reward_std": 0.2159452974796295, |
| "rewards/MultiModalAccuracyORM": 0.4500000089406967, |
| "step": 1330, |
| "train_speed(iter/s)": 0.040851 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 33.05, |
| "epoch": 0.5393939393939394, |
| "grad_norm": 7.678282737731934, |
| "kl": 0.0884857177734375, |
| "learning_rate": 2e-07, |
| "loss": 0.0011547883972525597, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5750000141561031, |
| "reward_std": 0.3044206708669662, |
| "rewards/MultiModalAccuracyORM": 0.5750000141561031, |
| "step": 1335, |
| "train_speed(iter/s)": 0.04087 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.7, |
| "epoch": 0.5414141414141415, |
| "grad_norm": 10.90495777130127, |
| "kl": 0.0806304931640625, |
| "learning_rate": 2e-07, |
| "loss": -0.017473408579826356, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000022351742, |
| "reward_std": 0.15518502295017242, |
| "rewards/MultiModalAccuracyORM": 0.2750000022351742, |
| "step": 1340, |
| "train_speed(iter/s)": 0.040877 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.55, |
| "epoch": 0.5434343434343434, |
| "grad_norm": 0.10261930525302887, |
| "kl": 0.060321044921875, |
| "learning_rate": 2e-07, |
| "loss": 0.0017479043453931808, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.152222341299057, |
| "rewards/MultiModalAccuracyORM": 0.33333334028720857, |
| "step": 1345, |
| "train_speed(iter/s)": 0.040892 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.75, |
| "epoch": 0.5454545454545454, |
| "grad_norm": 2.2841360569000244, |
| "kl": 0.024788665771484374, |
| "learning_rate": 2e-07, |
| "loss": -0.02739916443824768, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.2652174890041351, |
| "rewards/MultiModalAccuracyORM": 0.20833333656191827, |
| "step": 1350, |
| "train_speed(iter/s)": 0.040901 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.85, |
| "epoch": 0.5474747474747474, |
| "grad_norm": 13.731690406799316, |
| "kl": 0.0828125, |
| "learning_rate": 2e-07, |
| "loss": -0.0664910078048706, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333417773247, |
| "reward_std": 0.3362748771905899, |
| "rewards/MultiModalAccuracyORM": 0.4333333417773247, |
| "step": 1355, |
| "train_speed(iter/s)": 0.04092 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.95, |
| "epoch": 0.5494949494949495, |
| "grad_norm": 25.35189437866211, |
| "kl": 0.100750732421875, |
| "learning_rate": 2e-07, |
| "loss": -0.00892333835363388, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333432674408, |
| "reward_std": 0.2915389180183411, |
| "rewards/MultiModalAccuracyORM": 0.3083333432674408, |
| "step": 1360, |
| "train_speed(iter/s)": 0.04093 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.1, |
| "epoch": 0.5515151515151515, |
| "grad_norm": 9.685708999633789, |
| "kl": 0.061480712890625, |
| "learning_rate": 2e-07, |
| "loss": 0.012898986041545869, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.21447905600070954, |
| "rewards/MultiModalAccuracyORM": 0.30000000521540643, |
| "step": 1365, |
| "train_speed(iter/s)": 0.040933 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.45, |
| "epoch": 0.5535353535353535, |
| "grad_norm": 0.28964653611183167, |
| "kl": 0.1938751220703125, |
| "learning_rate": 2e-07, |
| "loss": 0.01745934933423996, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666701436043, |
| "reward_std": 0.2489179015159607, |
| "rewards/MultiModalAccuracyORM": 0.3416666701436043, |
| "step": 1370, |
| "train_speed(iter/s)": 0.040944 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.25, |
| "epoch": 0.5555555555555556, |
| "grad_norm": 8.731843948364258, |
| "kl": 0.06651153564453124, |
| "learning_rate": 2e-07, |
| "loss": 0.03409457206726074, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333656191824, |
| "reward_std": 0.25566026866436004, |
| "rewards/MultiModalAccuracyORM": 0.35833333656191824, |
| "step": 1375, |
| "train_speed(iter/s)": 0.040953 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.2, |
| "epoch": 0.5575757575757576, |
| "grad_norm": 35.31602096557617, |
| "kl": 0.100604248046875, |
| "learning_rate": 2e-07, |
| "loss": -0.010587018728256226, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000111758709, |
| "reward_std": 0.25487024188041685, |
| "rewards/MultiModalAccuracyORM": 0.2500000111758709, |
| "step": 1380, |
| "train_speed(iter/s)": 0.040972 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.55, |
| "epoch": 0.5595959595959596, |
| "grad_norm": 1.9312275648117065, |
| "kl": 0.09021759033203125, |
| "learning_rate": 2e-07, |
| "loss": -0.012255148589611053, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667610406875, |
| "reward_std": 0.255184069275856, |
| "rewards/MultiModalAccuracyORM": 0.24166667610406875, |
| "step": 1385, |
| "train_speed(iter/s)": 0.040973 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.25, |
| "epoch": 0.5616161616161616, |
| "grad_norm": 30.091777801513672, |
| "kl": 0.08451480865478515, |
| "learning_rate": 2e-07, |
| "loss": -0.004190707206726074, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2500000074505806, |
| "reward_std": 0.2875886201858521, |
| "rewards/MultiModalAccuracyORM": 0.2500000074505806, |
| "step": 1390, |
| "train_speed(iter/s)": 0.040981 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.25, |
| "epoch": 0.5636363636363636, |
| "grad_norm": 5.909719467163086, |
| "kl": 0.16330108642578126, |
| "learning_rate": 2e-07, |
| "loss": -0.01449722945690155, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35000000670552256, |
| "reward_std": 0.15821027159690856, |
| "rewards/MultiModalAccuracyORM": 0.35000000670552256, |
| "step": 1395, |
| "train_speed(iter/s)": 0.040992 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.9, |
| "epoch": 0.5656565656565656, |
| "grad_norm": 4.40855598449707, |
| "kl": 0.0266082763671875, |
| "learning_rate": 2e-07, |
| "loss": 0.026001608371734618, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667088866234, |
| "reward_std": 0.3048968702554703, |
| "rewards/MultiModalAccuracyORM": 0.24166667088866234, |
| "step": 1400, |
| "train_speed(iter/s)": 0.041006 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.85, |
| "epoch": 0.5676767676767677, |
| "grad_norm": 0.061144277453422546, |
| "kl": 0.07353515625, |
| "learning_rate": 2e-07, |
| "loss": -0.010889561474323272, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667312383652, |
| "reward_std": 0.14815284609794616, |
| "rewards/MultiModalAccuracyORM": 0.24166667312383652, |
| "step": 1405, |
| "train_speed(iter/s)": 0.041018 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.6, |
| "epoch": 0.5696969696969697, |
| "grad_norm": 0.037721507251262665, |
| "kl": 0.087078857421875, |
| "learning_rate": 2e-07, |
| "loss": 0.004135938733816147, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45833333358168604, |
| "reward_std": 0.08109080791473389, |
| "rewards/MultiModalAccuracyORM": 0.45833333358168604, |
| "step": 1410, |
| "train_speed(iter/s)": 0.041023 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.2, |
| "epoch": 0.5717171717171717, |
| "grad_norm": 4.825331211090088, |
| "kl": 0.18311767578125, |
| "learning_rate": 2e-07, |
| "loss": 0.02725890576839447, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.24166667684912682, |
| "reward_std": 0.2338038921356201, |
| "rewards/MultiModalAccuracyORM": 0.24166667684912682, |
| "step": 1415, |
| "train_speed(iter/s)": 0.04103 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 32.65, |
| "epoch": 0.5737373737373738, |
| "grad_norm": 1.8680031299591064, |
| "kl": 0.0274566650390625, |
| "learning_rate": 2e-07, |
| "loss": 0.0017455607652664185, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000074505806, |
| "reward_std": 0.12001575231552124, |
| "rewards/MultiModalAccuracyORM": 0.17500000074505806, |
| "step": 1420, |
| "train_speed(iter/s)": 0.041024 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.9, |
| "epoch": 0.5757575757575758, |
| "grad_norm": 3.193700075149536, |
| "kl": 0.305999755859375, |
| "learning_rate": 2e-07, |
| "loss": 0.046308600902557374, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000074505806, |
| "reward_std": 0.29177860021591184, |
| "rewards/MultiModalAccuracyORM": 0.4000000074505806, |
| "step": 1425, |
| "train_speed(iter/s)": 0.041037 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.95, |
| "epoch": 0.5777777777777777, |
| "grad_norm": 2.843719244003296, |
| "kl": 0.0330230712890625, |
| "learning_rate": 2e-07, |
| "loss": -0.04594253897666931, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.3008869707584381, |
| "rewards/MultiModalAccuracyORM": 0.30000000521540643, |
| "step": 1430, |
| "train_speed(iter/s)": 0.041054 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 20.0, |
| "epoch": 0.5797979797979798, |
| "grad_norm": 23.12917137145996, |
| "kl": 0.0993408203125, |
| "learning_rate": 2e-07, |
| "loss": 0.021137547492980958, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2166666716337204, |
| "reward_std": 0.2790306001901627, |
| "rewards/MultiModalAccuracyORM": 0.2166666716337204, |
| "step": 1435, |
| "train_speed(iter/s)": 0.041061 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.25, |
| "epoch": 0.5818181818181818, |
| "grad_norm": 17.79547882080078, |
| "kl": 0.1023834228515625, |
| "learning_rate": 2e-07, |
| "loss": 0.00415017232298851, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.20369119942188263, |
| "rewards/MultiModalAccuracyORM": 0.30000000521540643, |
| "step": 1440, |
| "train_speed(iter/s)": 0.041072 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.2, |
| "epoch": 0.5838383838383838, |
| "grad_norm": 15.119973182678223, |
| "kl": 0.11974754333496093, |
| "learning_rate": 2e-07, |
| "loss": -0.008057641983032226, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000089406967, |
| "reward_std": 0.2770525634288788, |
| "rewards/MultiModalAccuracyORM": 0.3250000089406967, |
| "step": 1445, |
| "train_speed(iter/s)": 0.041081 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.6, |
| "epoch": 0.5858585858585859, |
| "grad_norm": 0.13666389882564545, |
| "kl": 0.0672607421875, |
| "learning_rate": 2e-07, |
| "loss": -0.0010352015495300293, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333373069763, |
| "reward_std": 0.14996607303619386, |
| "rewards/MultiModalAccuracyORM": 0.2083333373069763, |
| "step": 1450, |
| "train_speed(iter/s)": 0.041098 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.3, |
| "epoch": 0.5878787878787879, |
| "grad_norm": 11.365659713745117, |
| "kl": 0.0847259521484375, |
| "learning_rate": 2e-07, |
| "loss": 0.014445498585700989, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1500000037252903, |
| "reward_std": 0.23955530524253846, |
| "rewards/MultiModalAccuracyORM": 0.1500000037252903, |
| "step": 1455, |
| "train_speed(iter/s)": 0.041108 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.5, |
| "epoch": 0.5898989898989899, |
| "grad_norm": 25.425418853759766, |
| "kl": 0.070880126953125, |
| "learning_rate": 2e-07, |
| "loss": 0.00023016731720417737, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666753590107, |
| "reward_std": 0.3265491545200348, |
| "rewards/MultiModalAccuracyORM": 0.3916666753590107, |
| "step": 1460, |
| "train_speed(iter/s)": 0.041127 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.35, |
| "epoch": 0.591919191919192, |
| "grad_norm": 11.779102325439453, |
| "kl": 0.07333221435546874, |
| "learning_rate": 2e-07, |
| "loss": 0.0254564106464386, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.16626566052436828, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 1465, |
| "train_speed(iter/s)": 0.041143 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.55, |
| "epoch": 0.593939393939394, |
| "grad_norm": 1.78038489818573, |
| "kl": 0.1328155517578125, |
| "learning_rate": 2e-07, |
| "loss": 0.008091837167739868, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000298023225, |
| "reward_std": 0.1293427586555481, |
| "rewards/MultiModalAccuracyORM": 0.27500000298023225, |
| "step": 1470, |
| "train_speed(iter/s)": 0.041154 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 35.25, |
| "epoch": 0.5959595959595959, |
| "grad_norm": 2.518378734588623, |
| "kl": 0.1015869140625, |
| "learning_rate": 2e-07, |
| "loss": -0.03122214078903198, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333358168602, |
| "reward_std": 0.3322981417179108, |
| "rewards/MultiModalAccuracyORM": 0.4333333358168602, |
| "step": 1475, |
| "train_speed(iter/s)": 0.041146 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.5, |
| "epoch": 0.597979797979798, |
| "grad_norm": 20.898664474487305, |
| "kl": 0.1433135986328125, |
| "learning_rate": 2e-07, |
| "loss": -0.02608821392059326, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2250000059604645, |
| "reward_std": 0.287842845916748, |
| "rewards/MultiModalAccuracyORM": 0.2250000059604645, |
| "step": 1480, |
| "train_speed(iter/s)": 0.041162 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.8, |
| "epoch": 0.6, |
| "grad_norm": 0.11180847883224487, |
| "kl": 0.13046875, |
| "learning_rate": 2e-07, |
| "loss": 0.003093409538269043, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666666716337205, |
| "reward_std": 0.12937834858894348, |
| "rewards/MultiModalAccuracyORM": 0.21666666716337205, |
| "step": 1485, |
| "train_speed(iter/s)": 0.041171 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.85, |
| "epoch": 0.602020202020202, |
| "grad_norm": 12.01523494720459, |
| "kl": 0.187371826171875, |
| "learning_rate": 2e-07, |
| "loss": -0.008616887032985687, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667237877847, |
| "reward_std": 0.21550226211547852, |
| "rewards/MultiModalAccuracyORM": 0.29166667237877847, |
| "step": 1490, |
| "train_speed(iter/s)": 0.041188 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.75, |
| "epoch": 0.604040404040404, |
| "grad_norm": 14.021830558776855, |
| "kl": 0.16456298828125, |
| "learning_rate": 2e-07, |
| "loss": 0.010373742878437042, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5166666701436042, |
| "reward_std": 0.21447905600070954, |
| "rewards/MultiModalAccuracyORM": 0.5166666701436042, |
| "step": 1495, |
| "train_speed(iter/s)": 0.041207 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 1.3669841289520264, |
| "learning_rate": 2e-07, |
| "loss": -0.011987817287445069, |
| "memory(GiB)": 104.49, |
| "step": 1500, |
| "train_speed(iter/s)": 0.041216 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 23.09000030040741, |
| "eval_kl": 0.12807769775390626, |
| "eval_loss": 0.0023684909101575613, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.42833334028720854, |
| "eval_reward_std": 0.21841024577617646, |
| "eval_rewards/MultiModalAccuracyORM": 0.42833334028720854, |
| "eval_runtime": 243.0786, |
| "eval_samples_per_second": 0.206, |
| "eval_steps_per_second": 0.021, |
| "step": 1500 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.275, |
| "epoch": 0.6080808080808081, |
| "grad_norm": 10.859317779541016, |
| "kl": 0.1035552978515625, |
| "learning_rate": 2e-07, |
| "loss": -0.011110000312328339, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35000000447034835, |
| "reward_std": 0.2066851645708084, |
| "rewards/MultiModalAccuracyORM": 0.35000000447034835, |
| "step": 1505, |
| "train_speed(iter/s)": 0.040874 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 61.45, |
| "epoch": 0.6101010101010101, |
| "grad_norm": 0.055811017751693726, |
| "kl": 0.03581314086914063, |
| "learning_rate": 2e-07, |
| "loss": 0.04541417956352234, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666731238365, |
| "reward_std": 0.33700530230998993, |
| "rewards/MultiModalAccuracyORM": 0.3916666731238365, |
| "step": 1510, |
| "train_speed(iter/s)": 0.040868 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.1, |
| "epoch": 0.6121212121212121, |
| "grad_norm": 2.9291131496429443, |
| "kl": 0.076611328125, |
| "learning_rate": 2e-07, |
| "loss": 0.0033688426017761232, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000298023225, |
| "reward_std": 0.22297748029232026, |
| "rewards/MultiModalAccuracyORM": 0.40000000298023225, |
| "step": 1515, |
| "train_speed(iter/s)": 0.040893 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 39.8, |
| "epoch": 0.6141414141414141, |
| "grad_norm": 10.698760032653809, |
| "kl": 0.024103546142578126, |
| "learning_rate": 2e-07, |
| "loss": 0.033906325697898865, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1416666716337204, |
| "reward_std": 0.24487241208553315, |
| "rewards/MultiModalAccuracyORM": 0.1416666716337204, |
| "step": 1520, |
| "train_speed(iter/s)": 0.040902 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.95, |
| "epoch": 0.6161616161616161, |
| "grad_norm": 5.847660541534424, |
| "kl": 0.141815185546875, |
| "learning_rate": 2e-07, |
| "loss": -0.014752772450447083, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000447034834, |
| "reward_std": 0.27756677865982055, |
| "rewards/MultiModalAccuracyORM": 0.40000000447034834, |
| "step": 1525, |
| "train_speed(iter/s)": 0.04091 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.85, |
| "epoch": 0.6181818181818182, |
| "grad_norm": 2.933770179748535, |
| "kl": 0.1540740966796875, |
| "learning_rate": 2e-07, |
| "loss": 0.021346482634544372, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500001043081284, |
| "reward_std": 0.3767612546682358, |
| "rewards/MultiModalAccuracyORM": 0.32500001043081284, |
| "step": 1530, |
| "train_speed(iter/s)": 0.040919 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 20.35, |
| "epoch": 0.6202020202020202, |
| "grad_norm": 6.487882614135742, |
| "kl": 0.08126373291015625, |
| "learning_rate": 2e-07, |
| "loss": -0.02819029986858368, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000037252903, |
| "reward_std": 0.2875886201858521, |
| "rewards/MultiModalAccuracyORM": 0.4000000037252903, |
| "step": 1535, |
| "train_speed(iter/s)": 0.040926 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.7, |
| "epoch": 0.6222222222222222, |
| "grad_norm": 0.1822008639574051, |
| "kl": 0.244976806640625, |
| "learning_rate": 2e-07, |
| "loss": 0.02670127749443054, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.43333333879709246, |
| "reward_std": 0.14589657187461852, |
| "rewards/MultiModalAccuracyORM": 0.43333333879709246, |
| "step": 1540, |
| "train_speed(iter/s)": 0.040936 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 54.35, |
| "epoch": 0.6242424242424243, |
| "grad_norm": 5.22224235534668, |
| "kl": 0.087286376953125, |
| "learning_rate": 2e-07, |
| "loss": 0.011146068572998047, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.39166667237877845, |
| "reward_std": 0.39465543925762175, |
| "rewards/MultiModalAccuracyORM": 0.39166667237877845, |
| "step": 1545, |
| "train_speed(iter/s)": 0.040941 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 46.65, |
| "epoch": 0.6262626262626263, |
| "grad_norm": 12.465606689453125, |
| "kl": 0.11739501953125, |
| "learning_rate": 2e-07, |
| "loss": 0.01348254531621933, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000447034837, |
| "reward_std": 0.18332211077213287, |
| "rewards/MultiModalAccuracyORM": 0.37500000447034837, |
| "step": 1550, |
| "train_speed(iter/s)": 0.040941 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.05, |
| "epoch": 0.6282828282828283, |
| "grad_norm": 0.03528100252151489, |
| "kl": 0.059906005859375, |
| "learning_rate": 2e-07, |
| "loss": 0.002536106109619141, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000223517417, |
| "reward_std": 0.12558708488941192, |
| "rewards/MultiModalAccuracyORM": 0.32500000223517417, |
| "step": 1555, |
| "train_speed(iter/s)": 0.040957 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.5, |
| "epoch": 0.6303030303030303, |
| "grad_norm": 15.021883010864258, |
| "kl": 0.11079330444335937, |
| "learning_rate": 2e-07, |
| "loss": 0.0029231052845716476, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333380520344, |
| "reward_std": 0.20973873138427734, |
| "rewards/MultiModalAccuracyORM": 0.4333333380520344, |
| "step": 1560, |
| "train_speed(iter/s)": 0.040974 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.1, |
| "epoch": 0.6323232323232323, |
| "grad_norm": 2.5578255653381348, |
| "kl": 0.04172821044921875, |
| "learning_rate": 2e-07, |
| "loss": 0.004573901742696762, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.12500000447034837, |
| "reward_std": 0.18087121844291687, |
| "rewards/MultiModalAccuracyORM": 0.12500000447034837, |
| "step": 1565, |
| "train_speed(iter/s)": 0.040988 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.8, |
| "epoch": 0.6343434343434343, |
| "grad_norm": 22.243240356445312, |
| "kl": 0.158673095703125, |
| "learning_rate": 2e-07, |
| "loss": -0.008480211347341537, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666667759418485, |
| "reward_std": 0.28480601906776426, |
| "rewards/MultiModalAccuracyORM": 0.41666667759418485, |
| "step": 1570, |
| "train_speed(iter/s)": 0.040998 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 22.0, |
| "epoch": 0.6363636363636364, |
| "grad_norm": 25.038570404052734, |
| "kl": 0.1517974853515625, |
| "learning_rate": 2e-07, |
| "loss": 0.04977948367595673, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667386889457, |
| "reward_std": 0.22625695466995238, |
| "rewards/MultiModalAccuracyORM": 0.36666667386889457, |
| "step": 1575, |
| "train_speed(iter/s)": 0.041005 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.15, |
| "epoch": 0.6383838383838384, |
| "grad_norm": 0.11025875806808472, |
| "kl": 0.0567169189453125, |
| "learning_rate": 2e-07, |
| "loss": 0.004630526155233383, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666679084301, |
| "reward_std": 0.13032740950584412, |
| "rewards/MultiModalAccuracyORM": 0.3916666679084301, |
| "step": 1580, |
| "train_speed(iter/s)": 0.041022 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.9, |
| "epoch": 0.6404040404040404, |
| "grad_norm": 8.77802562713623, |
| "kl": 0.06422119140625, |
| "learning_rate": 2e-07, |
| "loss": -0.002487625740468502, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666708886623, |
| "reward_std": 0.2822715133428574, |
| "rewards/MultiModalAccuracyORM": 0.2916666708886623, |
| "step": 1585, |
| "train_speed(iter/s)": 0.041027 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.1, |
| "epoch": 0.6424242424242425, |
| "grad_norm": 0.061026524752378464, |
| "kl": 0.181072998046875, |
| "learning_rate": 2e-07, |
| "loss": 0.012957209348678589, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333805203437, |
| "reward_std": 0.11928532719612121, |
| "rewards/MultiModalAccuracyORM": 0.28333333805203437, |
| "step": 1590, |
| "train_speed(iter/s)": 0.041041 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.6, |
| "epoch": 0.6444444444444445, |
| "grad_norm": 5.596570014953613, |
| "kl": 0.17645263671875, |
| "learning_rate": 2e-07, |
| "loss": -0.0008578440174460411, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666753590107, |
| "reward_std": 0.3071531385183334, |
| "rewards/MultiModalAccuracyORM": 0.2666666753590107, |
| "step": 1595, |
| "train_speed(iter/s)": 0.041048 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 71.7, |
| "epoch": 0.6464646464646465, |
| "grad_norm": 26.054533004760742, |
| "kl": 0.11879425048828125, |
| "learning_rate": 2e-07, |
| "loss": 0.007277928292751312, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000894069674, |
| "reward_std": 0.2732968896627426, |
| "rewards/MultiModalAccuracyORM": 0.37500000894069674, |
| "step": 1600, |
| "train_speed(iter/s)": 0.041045 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 44.45, |
| "epoch": 0.6484848484848484, |
| "grad_norm": 0.11397194862365723, |
| "kl": 0.0313624382019043, |
| "learning_rate": 2e-07, |
| "loss": 0.0012240668758749962, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333334028720857, |
| "reward_std": 0.152222341299057, |
| "rewards/MultiModalAccuracyORM": 0.33333334028720857, |
| "step": 1605, |
| "train_speed(iter/s)": 0.041049 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 54.7, |
| "epoch": 0.6505050505050505, |
| "grad_norm": 0.8132848739624023, |
| "kl": 0.099078369140625, |
| "learning_rate": 2e-07, |
| "loss": 0.008613920211791993, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.13333333432674407, |
| "reward_std": 0.20110656023025514, |
| "rewards/MultiModalAccuracyORM": 0.13333333432674407, |
| "step": 1610, |
| "train_speed(iter/s)": 0.041055 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 106.2, |
| "epoch": 0.6525252525252525, |
| "grad_norm": 2.1414718627929688, |
| "kl": 0.05146484375, |
| "learning_rate": 2e-07, |
| "loss": 0.0011494815349578857, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333333507180213, |
| "reward_std": 0.25270916521549225, |
| "rewards/MultiModalAccuracyORM": 0.33333333507180213, |
| "step": 1615, |
| "train_speed(iter/s)": 0.041049 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.7, |
| "epoch": 0.6545454545454545, |
| "grad_norm": 2.636408567428589, |
| "kl": 0.05029296875, |
| "learning_rate": 2e-07, |
| "loss": -0.02351543605327606, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5083333447575569, |
| "reward_std": 0.2792848199605942, |
| "rewards/MultiModalAccuracyORM": 0.5083333447575569, |
| "step": 1620, |
| "train_speed(iter/s)": 0.041065 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.3, |
| "epoch": 0.6565656565656566, |
| "grad_norm": 3.0985336303710938, |
| "kl": 0.065228271484375, |
| "learning_rate": 2e-07, |
| "loss": -0.014748664200305938, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4833333395421505, |
| "reward_std": 0.16225576102733613, |
| "rewards/MultiModalAccuracyORM": 0.4833333395421505, |
| "step": 1625, |
| "train_speed(iter/s)": 0.041069 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.65, |
| "epoch": 0.6585858585858586, |
| "grad_norm": 9.992680549621582, |
| "kl": 0.16975555419921876, |
| "learning_rate": 2e-07, |
| "loss": 0.008018460124731064, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.21368903517723084, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 1630, |
| "train_speed(iter/s)": 0.041077 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.95, |
| "epoch": 0.6606060606060606, |
| "grad_norm": 47.361576080322266, |
| "kl": 0.125982666015625, |
| "learning_rate": 2e-07, |
| "loss": 0.015030686557292939, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4666666753590107, |
| "reward_std": 0.2340581238269806, |
| "rewards/MultiModalAccuracyORM": 0.4666666753590107, |
| "step": 1635, |
| "train_speed(iter/s)": 0.041091 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.05, |
| "epoch": 0.6626262626262627, |
| "grad_norm": 6.931950569152832, |
| "kl": 0.16407470703125, |
| "learning_rate": 2e-07, |
| "loss": -0.012672655284404755, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4166666746139526, |
| "reward_std": 0.215248042345047, |
| "rewards/MultiModalAccuracyORM": 0.4166666746139526, |
| "step": 1640, |
| "train_speed(iter/s)": 0.041096 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.3, |
| "epoch": 0.6646464646464646, |
| "grad_norm": 0.08681845664978027, |
| "kl": 0.1269195556640625, |
| "learning_rate": 2e-07, |
| "loss": -0.0032407425343990324, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5083333358168602, |
| "reward_std": 0.13338824808597566, |
| "rewards/MultiModalAccuracyORM": 0.5083333358168602, |
| "step": 1645, |
| "train_speed(iter/s)": 0.041111 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 45.7, |
| "epoch": 0.6666666666666666, |
| "grad_norm": 3.8581395149230957, |
| "kl": 0.121484375, |
| "learning_rate": 2e-07, |
| "loss": 0.008351793140172958, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333432674407, |
| "reward_std": 0.2581467509269714, |
| "rewards/MultiModalAccuracyORM": 0.28333333432674407, |
| "step": 1650, |
| "train_speed(iter/s)": 0.041103 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.15, |
| "epoch": 0.6686868686868687, |
| "grad_norm": 17.391639709472656, |
| "kl": 0.13189697265625, |
| "learning_rate": 2e-07, |
| "loss": 0.056326770782470705, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333410322666, |
| "reward_std": 0.3480859398841858, |
| "rewards/MultiModalAccuracyORM": 0.4083333410322666, |
| "step": 1655, |
| "train_speed(iter/s)": 0.041102 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.25, |
| "epoch": 0.6707070707070707, |
| "grad_norm": 7.648516654968262, |
| "kl": 0.2052001953125, |
| "learning_rate": 2e-07, |
| "loss": -0.00421803817152977, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3500000037252903, |
| "reward_std": 0.25897533297538755, |
| "rewards/MultiModalAccuracyORM": 0.3500000037252903, |
| "step": 1660, |
| "train_speed(iter/s)": 0.041107 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 35.35, |
| "epoch": 0.6727272727272727, |
| "grad_norm": 1.1766724586486816, |
| "kl": 0.0945709228515625, |
| "learning_rate": 2e-07, |
| "loss": 0.013910901546478272, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667684912684, |
| "reward_std": 0.24487241804599763, |
| "rewards/MultiModalAccuracyORM": 0.29166667684912684, |
| "step": 1665, |
| "train_speed(iter/s)": 0.041117 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.35, |
| "epoch": 0.6747474747474748, |
| "grad_norm": 4.918646335601807, |
| "kl": 0.023187255859375, |
| "learning_rate": 2e-07, |
| "loss": -0.009105654805898667, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333507180213, |
| "reward_std": 0.2074824631214142, |
| "rewards/MultiModalAccuracyORM": 0.20833333507180213, |
| "step": 1670, |
| "train_speed(iter/s)": 0.041129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.25, |
| "epoch": 0.6767676767676768, |
| "grad_norm": 10.536828994750977, |
| "kl": 0.0798187255859375, |
| "learning_rate": 2e-07, |
| "loss": 0.02544976770877838, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333380520344, |
| "reward_std": 0.16451202929019929, |
| "rewards/MultiModalAccuracyORM": 0.2583333380520344, |
| "step": 1675, |
| "train_speed(iter/s)": 0.041135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 52.3, |
| "epoch": 0.6787878787878788, |
| "grad_norm": 5.117887020111084, |
| "kl": 0.02090301513671875, |
| "learning_rate": 2e-07, |
| "loss": 0.04579094052314758, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000447034836, |
| "reward_std": 0.31495430171489713, |
| "rewards/MultiModalAccuracyORM": 0.30000000447034836, |
| "step": 1680, |
| "train_speed(iter/s)": 0.041133 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.7, |
| "epoch": 0.6808080808080809, |
| "grad_norm": 8.01219367980957, |
| "kl": 0.1265289306640625, |
| "learning_rate": 2e-07, |
| "loss": 0.019950807094573975, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333380520344, |
| "reward_std": 0.16852192878723143, |
| "rewards/MultiModalAccuracyORM": 0.4333333380520344, |
| "step": 1685, |
| "train_speed(iter/s)": 0.041146 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.9, |
| "epoch": 0.6828282828282828, |
| "grad_norm": 7.546853065490723, |
| "kl": 0.0402618408203125, |
| "learning_rate": 2e-07, |
| "loss": 0.030116382241249084, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.42500000447034836, |
| "reward_std": 0.22629254460334777, |
| "rewards/MultiModalAccuracyORM": 0.42500000447034836, |
| "step": 1690, |
| "train_speed(iter/s)": 0.041159 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.2, |
| "epoch": 0.6848484848484848, |
| "grad_norm": 8.680946350097656, |
| "kl": 0.1186279296875, |
| "learning_rate": 2e-07, |
| "loss": -0.014576731622219086, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45833333507180213, |
| "reward_std": 0.2074824631214142, |
| "rewards/MultiModalAccuracyORM": 0.45833333507180213, |
| "step": 1695, |
| "train_speed(iter/s)": 0.041143 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.65, |
| "epoch": 0.6868686868686869, |
| "grad_norm": 33.545352935791016, |
| "kl": 0.11261825561523438, |
| "learning_rate": 2e-07, |
| "loss": 0.004046386480331421, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3250000014901161, |
| "reward_std": 0.18561154305934907, |
| "rewards/MultiModalAccuracyORM": 0.3250000014901161, |
| "step": 1700, |
| "train_speed(iter/s)": 0.041159 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 69.85, |
| "epoch": 0.6888888888888889, |
| "grad_norm": 13.335136413574219, |
| "kl": 0.11529541015625, |
| "learning_rate": 2e-07, |
| "loss": 0.0011761213652789592, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666701436043, |
| "reward_std": 0.18482151627540588, |
| "rewards/MultiModalAccuracyORM": 0.3166666701436043, |
| "step": 1705, |
| "train_speed(iter/s)": 0.041157 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.45, |
| "epoch": 0.6909090909090909, |
| "grad_norm": 14.620392799377441, |
| "kl": 0.07541313171386718, |
| "learning_rate": 2e-07, |
| "loss": 0.01065676361322403, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1916666716337204, |
| "reward_std": 0.24961273670196532, |
| "rewards/MultiModalAccuracyORM": 0.1916666716337204, |
| "step": 1710, |
| "train_speed(iter/s)": 0.041164 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.2, |
| "epoch": 0.692929292929293, |
| "grad_norm": 1.2891874313354492, |
| "kl": 0.13163909912109376, |
| "learning_rate": 2e-07, |
| "loss": 0.02046767473220825, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4916666731238365, |
| "reward_std": 0.21374863088130952, |
| "rewards/MultiModalAccuracyORM": 0.4916666731238365, |
| "step": 1715, |
| "train_speed(iter/s)": 0.041157 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.4, |
| "epoch": 0.694949494949495, |
| "grad_norm": 3.101806879043579, |
| "kl": 0.22337646484375, |
| "learning_rate": 2e-07, |
| "loss": 0.008609502017498017, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500001341104505, |
| "reward_std": 0.3003751873970032, |
| "rewards/MultiModalAccuracyORM": 0.37500001341104505, |
| "step": 1720, |
| "train_speed(iter/s)": 0.041168 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.2, |
| "epoch": 0.696969696969697, |
| "grad_norm": 17.069448471069336, |
| "kl": 0.10420684814453125, |
| "learning_rate": 2e-07, |
| "loss": -0.020038720965385438, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000298023224, |
| "reward_std": 0.14433756470680237, |
| "rewards/MultiModalAccuracyORM": 0.32500000298023224, |
| "step": 1725, |
| "train_speed(iter/s)": 0.041178 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.0, |
| "epoch": 0.6989898989898989, |
| "grad_norm": 2.795525074005127, |
| "kl": 0.0689239501953125, |
| "learning_rate": 2e-07, |
| "loss": 0.022227957844734192, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000074505805, |
| "reward_std": 0.15824586153030396, |
| "rewards/MultiModalAccuracyORM": 0.22500000074505805, |
| "step": 1730, |
| "train_speed(iter/s)": 0.041179 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 63.75, |
| "epoch": 0.701010101010101, |
| "grad_norm": 2.3581957817077637, |
| "kl": 0.04788818359375, |
| "learning_rate": 2e-07, |
| "loss": 0.033317530155181886, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35000000298023226, |
| "reward_std": 0.2837563753128052, |
| "rewards/MultiModalAccuracyORM": 0.35000000298023226, |
| "step": 1735, |
| "train_speed(iter/s)": 0.04118 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.5, |
| "epoch": 0.703030303030303, |
| "grad_norm": 2.782379627227783, |
| "kl": 0.080255126953125, |
| "learning_rate": 2e-07, |
| "loss": -0.012095755338668824, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.18333333879709243, |
| "reward_std": 0.28154108822345736, |
| "rewards/MultiModalAccuracyORM": 0.18333333879709243, |
| "step": 1740, |
| "train_speed(iter/s)": 0.041192 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.2, |
| "epoch": 0.705050505050505, |
| "grad_norm": 3.129946708679199, |
| "kl": 0.04556884765625, |
| "learning_rate": 2e-07, |
| "loss": 0.037814974784851074, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45000000223517417, |
| "reward_std": 0.21378422081470488, |
| "rewards/MultiModalAccuracyORM": 0.45000000223517417, |
| "step": 1745, |
| "train_speed(iter/s)": 0.041197 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "grad_norm": 2.4902050495147705, |
| "learning_rate": 2e-07, |
| "loss": 0.0172103151679039, |
| "memory(GiB)": 104.49, |
| "step": 1750, |
| "train_speed(iter/s)": 0.041202 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 34.29833379745483, |
| "eval_kl": 0.10184234619140625, |
| "eval_loss": 0.012326983734965324, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.4183333376049995, |
| "eval_reward_std": 0.1789151507616043, |
| "eval_rewards/MultiModalAccuracyORM": 0.4183333376049995, |
| "eval_runtime": 267.6806, |
| "eval_samples_per_second": 0.187, |
| "eval_steps_per_second": 0.019, |
| "step": 1750 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 31.0, |
| "epoch": 0.7090909090909091, |
| "grad_norm": 14.173089981079102, |
| "kl": 0.10649490356445312, |
| "learning_rate": 2e-07, |
| "loss": 0.007458774745464325, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28750000670552256, |
| "reward_std": 0.23671061247587205, |
| "rewards/MultiModalAccuracyORM": 0.28750000670552256, |
| "step": 1755, |
| "train_speed(iter/s)": 0.040873 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.6, |
| "epoch": 0.7111111111111111, |
| "grad_norm": 2.1408114433288574, |
| "kl": 0.066253662109375, |
| "learning_rate": 2e-07, |
| "loss": 0.027722400426864625, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.16666667237877847, |
| "reward_std": 0.2669951319694519, |
| "rewards/MultiModalAccuracyORM": 0.16666667237877847, |
| "step": 1760, |
| "train_speed(iter/s)": 0.040871 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 22.25, |
| "epoch": 0.7131313131313132, |
| "grad_norm": 24.069496154785156, |
| "kl": 0.0887176513671875, |
| "learning_rate": 2e-07, |
| "loss": 0.00502915009856224, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333395421505, |
| "reward_std": 0.18407654762268066, |
| "rewards/MultiModalAccuracyORM": 0.2583333395421505, |
| "step": 1765, |
| "train_speed(iter/s)": 0.040877 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 26.2, |
| "epoch": 0.7151515151515152, |
| "grad_norm": 2.3050827980041504, |
| "kl": 0.2020782470703125, |
| "learning_rate": 2e-07, |
| "loss": 0.016819214820861815, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000022351742, |
| "reward_std": 0.174764084815979, |
| "rewards/MultiModalAccuracyORM": 0.2750000022351742, |
| "step": 1770, |
| "train_speed(iter/s)": 0.040888 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.25, |
| "epoch": 0.7171717171717171, |
| "grad_norm": 8.913907051086426, |
| "kl": 0.137213134765625, |
| "learning_rate": 2e-07, |
| "loss": -0.006190218776464462, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5250000029802322, |
| "reward_std": 0.22078081369400024, |
| "rewards/MultiModalAccuracyORM": 0.5250000029802322, |
| "step": 1775, |
| "train_speed(iter/s)": 0.040903 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 30.05, |
| "epoch": 0.7191919191919192, |
| "grad_norm": 2.8246963024139404, |
| "kl": 0.11649169921875, |
| "learning_rate": 2e-07, |
| "loss": -0.06523974537849427, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35833333507180215, |
| "reward_std": 0.22629254460334777, |
| "rewards/MultiModalAccuracyORM": 0.35833333507180215, |
| "step": 1780, |
| "train_speed(iter/s)": 0.040913 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.3, |
| "epoch": 0.7212121212121212, |
| "grad_norm": 7.319549083709717, |
| "kl": 0.100701904296875, |
| "learning_rate": 2e-07, |
| "loss": 0.03789505362510681, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000149011612, |
| "reward_std": 0.2712534427642822, |
| "rewards/MultiModalAccuracyORM": 0.2750000149011612, |
| "step": 1785, |
| "train_speed(iter/s)": 0.040921 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.75, |
| "epoch": 0.7232323232323232, |
| "grad_norm": 8.2145357131958, |
| "kl": 0.13018798828125, |
| "learning_rate": 2e-07, |
| "loss": -0.021410945057868957, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666738688946, |
| "reward_std": 0.2325587123632431, |
| "rewards/MultiModalAccuracyORM": 0.3916666738688946, |
| "step": 1790, |
| "train_speed(iter/s)": 0.040929 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.7, |
| "epoch": 0.7252525252525253, |
| "grad_norm": 8.516419410705566, |
| "kl": 0.1542633056640625, |
| "learning_rate": 2e-07, |
| "loss": 0.02146460711956024, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000059604645, |
| "reward_std": 0.21823472976684571, |
| "rewards/MultiModalAccuracyORM": 0.3000000059604645, |
| "step": 1795, |
| "train_speed(iter/s)": 0.040941 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.8, |
| "epoch": 0.7272727272727273, |
| "grad_norm": 10.487430572509766, |
| "kl": 0.2330535888671875, |
| "learning_rate": 2e-07, |
| "loss": 0.03371854722499847, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000521540644, |
| "reward_std": 0.16925235390663146, |
| "rewards/MultiModalAccuracyORM": 0.37500000521540644, |
| "step": 1800, |
| "train_speed(iter/s)": 0.040952 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.9, |
| "epoch": 0.7292929292929293, |
| "grad_norm": 2.5021793842315674, |
| "kl": 0.053016281127929686, |
| "learning_rate": 2e-07, |
| "loss": -0.005027930065989494, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.358333333581686, |
| "reward_std": 0.1193209171295166, |
| "rewards/MultiModalAccuracyORM": 0.358333333581686, |
| "step": 1805, |
| "train_speed(iter/s)": 0.040965 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.3, |
| "epoch": 0.7313131313131314, |
| "grad_norm": 9.409316062927246, |
| "kl": 0.077154541015625, |
| "learning_rate": 2e-07, |
| "loss": 0.00013190507888793945, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333343267441, |
| "reward_std": 0.14188667237758637, |
| "rewards/MultiModalAccuracyORM": 0.3083333343267441, |
| "step": 1810, |
| "train_speed(iter/s)": 0.040974 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.2, |
| "epoch": 0.7333333333333333, |
| "grad_norm": 8.413249015808105, |
| "kl": 0.06329345703125, |
| "learning_rate": 2e-07, |
| "loss": 0.0067844375967979435, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333417773247, |
| "reward_std": 0.2878072619438171, |
| "rewards/MultiModalAccuracyORM": 0.3833333417773247, |
| "step": 1815, |
| "train_speed(iter/s)": 0.040988 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 42.4, |
| "epoch": 0.7353535353535353, |
| "grad_norm": 3.3386476039886475, |
| "kl": 0.0814666748046875, |
| "learning_rate": 2e-07, |
| "loss": 0.020126067101955414, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.37500000596046446, |
| "reward_std": 0.18087121844291687, |
| "rewards/MultiModalAccuracyORM": 0.37500000596046446, |
| "step": 1820, |
| "train_speed(iter/s)": 0.041 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.6, |
| "epoch": 0.7373737373737373, |
| "grad_norm": 11.123106956481934, |
| "kl": 0.13977203369140626, |
| "learning_rate": 2e-07, |
| "loss": 0.0059658966958522795, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666716337204, |
| "reward_std": 0.15821027159690856, |
| "rewards/MultiModalAccuracyORM": 0.2666666716337204, |
| "step": 1825, |
| "train_speed(iter/s)": 0.041002 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 23.75, |
| "epoch": 0.7393939393939394, |
| "grad_norm": 4.5245361328125, |
| "kl": 0.098736572265625, |
| "learning_rate": 2e-07, |
| "loss": -0.024525515735149384, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666731238365, |
| "reward_std": 0.20995736718177796, |
| "rewards/MultiModalAccuracyORM": 0.2666666731238365, |
| "step": 1830, |
| "train_speed(iter/s)": 0.040989 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.1, |
| "epoch": 0.7414141414141414, |
| "grad_norm": 0.7691475749015808, |
| "kl": 0.0991119384765625, |
| "learning_rate": 2e-07, |
| "loss": 0.039085444808006284, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4250000029802322, |
| "reward_std": 0.12552748322486879, |
| "rewards/MultiModalAccuracyORM": 0.4250000029802322, |
| "step": 1835, |
| "train_speed(iter/s)": 0.040998 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.0, |
| "epoch": 0.7434343434343434, |
| "grad_norm": 0.2410029023885727, |
| "kl": 0.17838897705078124, |
| "learning_rate": 2e-07, |
| "loss": 0.04514871537685394, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.6166666708886623, |
| "reward_std": 0.13258367776870728, |
| "rewards/MultiModalAccuracyORM": 0.6166666708886623, |
| "step": 1840, |
| "train_speed(iter/s)": 0.040995 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.9, |
| "epoch": 0.7454545454545455, |
| "grad_norm": 12.146939277648926, |
| "kl": 0.097296142578125, |
| "learning_rate": 2e-07, |
| "loss": 0.02126455307006836, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333656191827, |
| "reward_std": 0.17657731771469115, |
| "rewards/MultiModalAccuracyORM": 0.20833333656191827, |
| "step": 1845, |
| "train_speed(iter/s)": 0.041 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.0, |
| "epoch": 0.7474747474747475, |
| "grad_norm": 10.014187812805176, |
| "kl": 0.12047119140625, |
| "learning_rate": 2e-07, |
| "loss": 0.0045259218662977215, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.46666667237877846, |
| "reward_std": 0.25897533297538755, |
| "rewards/MultiModalAccuracyORM": 0.46666667237877846, |
| "step": 1850, |
| "train_speed(iter/s)": 0.041019 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.3, |
| "epoch": 0.7494949494949495, |
| "grad_norm": 0.34578633308410645, |
| "kl": 0.13382987976074218, |
| "learning_rate": 2e-07, |
| "loss": 0.003971926495432853, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1833333358168602, |
| "reward_std": 0.1356445223093033, |
| "rewards/MultiModalAccuracyORM": 0.1833333358168602, |
| "step": 1855, |
| "train_speed(iter/s)": 0.041027 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.05, |
| "epoch": 0.7515151515151515, |
| "grad_norm": 17.808372497558594, |
| "kl": 0.025757217407226564, |
| "learning_rate": 2e-07, |
| "loss": 0.035965240001678465, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334252238274, |
| "reward_std": 0.19713521599769593, |
| "rewards/MultiModalAccuracyORM": 0.28333334252238274, |
| "step": 1860, |
| "train_speed(iter/s)": 0.041022 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.8, |
| "epoch": 0.7535353535353535, |
| "grad_norm": 24.15494155883789, |
| "kl": 0.0437255859375, |
| "learning_rate": 2e-07, |
| "loss": -0.06361854076385498, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000096857548, |
| "reward_std": 0.36670139729976653, |
| "rewards/MultiModalAccuracyORM": 0.4000000096857548, |
| "step": 1865, |
| "train_speed(iter/s)": 0.041031 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.85, |
| "epoch": 0.7555555555555555, |
| "grad_norm": 80.81800079345703, |
| "kl": 0.08274688720703124, |
| "learning_rate": 2e-07, |
| "loss": 0.003989287465810776, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333395421505, |
| "reward_std": 0.15846449732780457, |
| "rewards/MultiModalAccuracyORM": 0.3083333395421505, |
| "step": 1870, |
| "train_speed(iter/s)": 0.041038 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.5, |
| "epoch": 0.7575757575757576, |
| "grad_norm": 14.617817878723145, |
| "kl": 0.090728759765625, |
| "learning_rate": 2e-07, |
| "loss": -0.0045210480690002445, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666731238365, |
| "reward_std": 0.19337954819202424, |
| "rewards/MultiModalAccuracyORM": 0.2666666731238365, |
| "step": 1875, |
| "train_speed(iter/s)": 0.041048 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.65, |
| "epoch": 0.7595959595959596, |
| "grad_norm": 13.89445972442627, |
| "kl": 0.13492431640625, |
| "learning_rate": 2e-07, |
| "loss": 0.012078547477722168, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333805203438, |
| "reward_std": 0.12552748322486879, |
| "rewards/MultiModalAccuracyORM": 0.20833333805203438, |
| "step": 1880, |
| "train_speed(iter/s)": 0.041062 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 61.05, |
| "epoch": 0.7616161616161616, |
| "grad_norm": 11.715389251708984, |
| "kl": 0.1376861572265625, |
| "learning_rate": 2e-07, |
| "loss": -0.014951804280281067, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000298023225, |
| "reward_std": 0.19337954223155976, |
| "rewards/MultiModalAccuracyORM": 0.40000000298023225, |
| "step": 1885, |
| "train_speed(iter/s)": 0.041063 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.4, |
| "epoch": 0.7636363636363637, |
| "grad_norm": 0.07281157374382019, |
| "kl": 0.095611572265625, |
| "learning_rate": 2e-07, |
| "loss": 0.012891271710395813, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.483333333581686, |
| "reward_std": 0.12631751000881195, |
| "rewards/MultiModalAccuracyORM": 0.483333333581686, |
| "step": 1890, |
| "train_speed(iter/s)": 0.041073 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 57.6, |
| "epoch": 0.7656565656565657, |
| "grad_norm": 1.9145233631134033, |
| "kl": 0.19044036865234376, |
| "learning_rate": 2e-07, |
| "loss": -0.03062499463558197, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.35000000447034835, |
| "reward_std": 0.24490800201892854, |
| "rewards/MultiModalAccuracyORM": 0.35000000447034835, |
| "step": 1895, |
| "train_speed(iter/s)": 0.041073 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.8, |
| "epoch": 0.7676767676767676, |
| "grad_norm": 22.877309799194336, |
| "kl": 0.161077880859375, |
| "learning_rate": 2e-07, |
| "loss": 0.008297159522771835, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.09041781425476074, |
| "rewards/MultiModalAccuracyORM": 0.27500000447034834, |
| "step": 1900, |
| "train_speed(iter/s)": 0.041078 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.0, |
| "epoch": 0.7696969696969697, |
| "grad_norm": 21.666425704956055, |
| "kl": 0.1980316162109375, |
| "learning_rate": 2e-07, |
| "loss": 0.020768019556999206, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.46666666865348816, |
| "reward_std": 0.24114990234375, |
| "rewards/MultiModalAccuracyORM": 0.46666666865348816, |
| "step": 1905, |
| "train_speed(iter/s)": 0.041093 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.25, |
| "epoch": 0.7717171717171717, |
| "grad_norm": 22.925674438476562, |
| "kl": 0.0932861328125, |
| "learning_rate": 2e-07, |
| "loss": 0.009479768574237823, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30833334028720855, |
| "reward_std": 0.2466856449842453, |
| "rewards/MultiModalAccuracyORM": 0.30833334028720855, |
| "step": 1910, |
| "train_speed(iter/s)": 0.0411 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.65, |
| "epoch": 0.7737373737373737, |
| "grad_norm": 0.14844609797000885, |
| "kl": 0.232122802734375, |
| "learning_rate": 2e-07, |
| "loss": 0.010550656914710998, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.6166666686534882, |
| "reward_std": 0.16454761922359468, |
| "rewards/MultiModalAccuracyORM": 0.6166666686534882, |
| "step": 1915, |
| "train_speed(iter/s)": 0.041111 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.85, |
| "epoch": 0.7757575757575758, |
| "grad_norm": 13.482421875, |
| "kl": 0.120068359375, |
| "learning_rate": 2e-07, |
| "loss": 0.022914706170558928, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4500000044703484, |
| "reward_std": 0.25292780101299284, |
| "rewards/MultiModalAccuracyORM": 0.4500000044703484, |
| "step": 1920, |
| "train_speed(iter/s)": 0.041122 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.7, |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.19085177779197693, |
| "kl": 0.14432373046875, |
| "learning_rate": 2e-07, |
| "loss": 0.020079278945922853, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5416666671633721, |
| "reward_std": 0.18859823644161225, |
| "rewards/MultiModalAccuracyORM": 0.5416666671633721, |
| "step": 1925, |
| "train_speed(iter/s)": 0.04113 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 57.35, |
| "epoch": 0.7797979797979798, |
| "grad_norm": 0.04123455658555031, |
| "kl": 0.10629119873046874, |
| "learning_rate": 2e-07, |
| "loss": 0.02534767985343933, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4833333358168602, |
| "reward_std": 0.1652424544095993, |
| "rewards/MultiModalAccuracyORM": 0.4833333358168602, |
| "step": 1930, |
| "train_speed(iter/s)": 0.041128 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.9, |
| "epoch": 0.7818181818181819, |
| "grad_norm": 7.716069221496582, |
| "kl": 0.03204345703125, |
| "learning_rate": 2e-07, |
| "loss": 0.018103978037834166, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000596046447, |
| "reward_std": 0.3275222271680832, |
| "rewards/MultiModalAccuracyORM": 0.32500000596046447, |
| "step": 1935, |
| "train_speed(iter/s)": 0.041139 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.0, |
| "epoch": 0.7838383838383839, |
| "grad_norm": 1.998159408569336, |
| "kl": 0.2424346923828125, |
| "learning_rate": 2e-07, |
| "loss": -0.0022819479927420616, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666738688946, |
| "reward_std": 0.2526735752820969, |
| "rewards/MultiModalAccuracyORM": 0.3416666738688946, |
| "step": 1940, |
| "train_speed(iter/s)": 0.041144 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.25, |
| "epoch": 0.7858585858585858, |
| "grad_norm": 0.11755078285932541, |
| "kl": 0.1235809326171875, |
| "learning_rate": 2e-07, |
| "loss": 0.01756092607975006, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333358168602, |
| "reward_std": 0.16145119071006775, |
| "rewards/MultiModalAccuracyORM": 0.4083333358168602, |
| "step": 1945, |
| "train_speed(iter/s)": 0.041156 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.45, |
| "epoch": 0.7878787878787878, |
| "grad_norm": 11.287028312683105, |
| "kl": 0.05250396728515625, |
| "learning_rate": 2e-07, |
| "loss": -0.009032456576824189, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.36666667386889457, |
| "reward_std": 0.30639870166778566, |
| "rewards/MultiModalAccuracyORM": 0.36666667386889457, |
| "step": 1950, |
| "train_speed(iter/s)": 0.041159 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.6, |
| "epoch": 0.7898989898989899, |
| "grad_norm": 0.1284160166978836, |
| "kl": 0.046563720703125, |
| "learning_rate": 2e-07, |
| "loss": 0.0006015380378812552, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.6250000067055226, |
| "reward_std": 0.1973894417285919, |
| "rewards/MultiModalAccuracyORM": 0.6250000067055226, |
| "step": 1955, |
| "train_speed(iter/s)": 0.041172 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 20.05, |
| "epoch": 0.7919191919191919, |
| "grad_norm": 0.5048889517784119, |
| "kl": 0.0877197265625, |
| "learning_rate": 2e-07, |
| "loss": 0.0017469068989157677, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.125, |
| "reward_std": 0.045226702094078065, |
| "rewards/MultiModalAccuracyORM": 0.125, |
| "step": 1960, |
| "train_speed(iter/s)": 0.041177 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 23.7, |
| "epoch": 0.793939393939394, |
| "grad_norm": 10.217628479003906, |
| "kl": 0.1369842529296875, |
| "learning_rate": 2e-07, |
| "loss": -0.007052314281463623, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4083333395421505, |
| "reward_std": 0.19968129992485045, |
| "rewards/MultiModalAccuracyORM": 0.4083333395421505, |
| "step": 1965, |
| "train_speed(iter/s)": 0.041181 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 31.1, |
| "epoch": 0.795959595959596, |
| "grad_norm": 15.147607803344727, |
| "kl": 0.139697265625, |
| "learning_rate": 2e-07, |
| "loss": -0.0005793333053588867, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000521540643, |
| "reward_std": 0.26928699016571045, |
| "rewards/MultiModalAccuracyORM": 0.30000000521540643, |
| "step": 1970, |
| "train_speed(iter/s)": 0.041179 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.05, |
| "epoch": 0.797979797979798, |
| "grad_norm": 14.508552551269531, |
| "kl": 0.1334228515625, |
| "learning_rate": 2e-07, |
| "loss": 0.014681649208068848, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000447034836, |
| "reward_std": 0.31593895256519317, |
| "rewards/MultiModalAccuracyORM": 0.30000000447034836, |
| "step": 1975, |
| "train_speed(iter/s)": 0.041186 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.4, |
| "epoch": 0.8, |
| "grad_norm": 14.245569229125977, |
| "kl": 0.07449951171875, |
| "learning_rate": 2e-07, |
| "loss": 0.019247731566429137, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334252238275, |
| "reward_std": 0.256683474779129, |
| "rewards/MultiModalAccuracyORM": 0.23333334252238275, |
| "step": 1980, |
| "train_speed(iter/s)": 0.041204 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.0, |
| "epoch": 0.802020202020202, |
| "grad_norm": 0.06112133339047432, |
| "kl": 0.09664306640625, |
| "learning_rate": 2e-07, |
| "loss": -0.010070499032735825, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45000000223517417, |
| "reward_std": 0.17555411159992218, |
| "rewards/MultiModalAccuracyORM": 0.45000000223517417, |
| "step": 1985, |
| "train_speed(iter/s)": 0.04121 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 51.2, |
| "epoch": 0.804040404040404, |
| "grad_norm": 0.20859137177467346, |
| "kl": 0.2631103515625, |
| "learning_rate": 2e-07, |
| "loss": -0.03446192741394043, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666679084301, |
| "reward_std": 0.15194410383701323, |
| "rewards/MultiModalAccuracyORM": 0.2666666679084301, |
| "step": 1990, |
| "train_speed(iter/s)": 0.041215 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.6, |
| "epoch": 0.806060606060606, |
| "grad_norm": 2.347874879837036, |
| "kl": 0.09171142578125, |
| "learning_rate": 2e-07, |
| "loss": 0.003209712356328964, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4833333395421505, |
| "reward_std": 0.1770799547433853, |
| "rewards/MultiModalAccuracyORM": 0.4833333395421505, |
| "step": 1995, |
| "train_speed(iter/s)": 0.041215 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 12.103494644165039, |
| "learning_rate": 2e-07, |
| "loss": 0.051232755184173584, |
| "memory(GiB)": 104.49, |
| "step": 2000, |
| "train_speed(iter/s)": 0.041214 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 32.68000123023987, |
| "eval_kl": 0.1109576416015625, |
| "eval_loss": 0.001846806495450437, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.4066666714847088, |
| "eval_reward_std": 0.1827806031703949, |
| "eval_rewards/MultiModalAccuracyORM": 0.4066666714847088, |
| "eval_runtime": 274.3294, |
| "eval_samples_per_second": 0.182, |
| "eval_steps_per_second": 0.018, |
| "step": 2000 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.275, |
| "epoch": 0.8101010101010101, |
| "grad_norm": 20.72494888305664, |
| "kl": 0.09075469970703125, |
| "learning_rate": 2e-07, |
| "loss": 0.01332613080739975, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667200624942, |
| "reward_std": 0.22227564305067063, |
| "rewards/MultiModalAccuracyORM": 0.21666667200624942, |
| "step": 2005, |
| "train_speed(iter/s)": 0.04093 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.2, |
| "epoch": 0.8121212121212121, |
| "grad_norm": 10.545307159423828, |
| "kl": 0.157220458984375, |
| "learning_rate": 2e-07, |
| "loss": 0.02192305028438568, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2666666716337204, |
| "reward_std": 0.21999078691005708, |
| "rewards/MultiModalAccuracyORM": 0.2666666716337204, |
| "step": 2010, |
| "train_speed(iter/s)": 0.040938 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.65, |
| "epoch": 0.8141414141414142, |
| "grad_norm": 0.1491260975599289, |
| "kl": 0.1144989013671875, |
| "learning_rate": 2e-07, |
| "loss": 0.021004287898540495, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.40000000298023225, |
| "reward_std": 0.17456946671009063, |
| "rewards/MultiModalAccuracyORM": 0.40000000298023225, |
| "step": 2015, |
| "train_speed(iter/s)": 0.040944 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.6, |
| "epoch": 0.8161616161616162, |
| "grad_norm": 19.212770462036133, |
| "kl": 0.0832275390625, |
| "learning_rate": 2e-07, |
| "loss": 0.004856839030981064, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500001043081285, |
| "reward_std": 0.2800416827201843, |
| "rewards/MultiModalAccuracyORM": 0.27500001043081285, |
| "step": 2020, |
| "train_speed(iter/s)": 0.040949 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 20.2, |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.25410985946655273, |
| "kl": 0.129962158203125, |
| "learning_rate": 2e-07, |
| "loss": 0.016422802209854127, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5250000059604645, |
| "reward_std": 0.09041781425476074, |
| "rewards/MultiModalAccuracyORM": 0.5250000059604645, |
| "step": 2025, |
| "train_speed(iter/s)": 0.040961 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.25, |
| "epoch": 0.8202020202020202, |
| "grad_norm": 6.931528568267822, |
| "kl": 0.260528564453125, |
| "learning_rate": 2e-07, |
| "loss": -0.02277086079120636, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5583333410322666, |
| "reward_std": 0.2526735752820969, |
| "rewards/MultiModalAccuracyORM": 0.5583333410322666, |
| "step": 2030, |
| "train_speed(iter/s)": 0.040965 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.7, |
| "epoch": 0.8222222222222222, |
| "grad_norm": 27.311315536499023, |
| "kl": 0.07995872497558594, |
| "learning_rate": 2e-07, |
| "loss": 0.024982047080993653, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666753590107, |
| "reward_std": 0.3019101768732071, |
| "rewards/MultiModalAccuracyORM": 0.3416666753590107, |
| "step": 2035, |
| "train_speed(iter/s)": 0.040969 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 25.5, |
| "epoch": 0.8242424242424242, |
| "grad_norm": 0.08455629646778107, |
| "kl": 0.1283721923828125, |
| "learning_rate": 2e-07, |
| "loss": 0.007968991994857788, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2083333395421505, |
| "reward_std": 0.2167353242635727, |
| "rewards/MultiModalAccuracyORM": 0.2083333395421505, |
| "step": 2040, |
| "train_speed(iter/s)": 0.040978 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.7, |
| "epoch": 0.8262626262626263, |
| "grad_norm": 0.012692108750343323, |
| "kl": 0.06329593658447266, |
| "learning_rate": 2e-07, |
| "loss": 0.019880211353302, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4750000022351742, |
| "reward_std": 0.1559540092945099, |
| "rewards/MultiModalAccuracyORM": 0.4750000022351742, |
| "step": 2045, |
| "train_speed(iter/s)": 0.040984 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.25, |
| "epoch": 0.8282828282828283, |
| "grad_norm": 0.49161991477012634, |
| "kl": 0.041827392578125, |
| "learning_rate": 2e-07, |
| "loss": 0.023220118880271912, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333358168602, |
| "reward_std": 0.13882583379745483, |
| "rewards/MultiModalAccuracyORM": 0.2583333358168602, |
| "step": 2050, |
| "train_speed(iter/s)": 0.040982 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.25, |
| "epoch": 0.8303030303030303, |
| "grad_norm": 3.920830249786377, |
| "kl": 0.130963134765625, |
| "learning_rate": 2e-07, |
| "loss": 0.012984590232372284, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3666666753590107, |
| "reward_std": 0.3127244710922241, |
| "rewards/MultiModalAccuracyORM": 0.3666666753590107, |
| "step": 2055, |
| "train_speed(iter/s)": 0.040988 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.95, |
| "epoch": 0.8323232323232324, |
| "grad_norm": 2.618926763534546, |
| "kl": 0.0820068359375, |
| "learning_rate": 2e-07, |
| "loss": -0.0011547774076461792, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666738688946, |
| "reward_std": 0.174509859085083, |
| "rewards/MultiModalAccuracyORM": 0.3166666738688946, |
| "step": 2060, |
| "train_speed(iter/s)": 0.040997 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 4.05, |
| "epoch": 0.8343434343434344, |
| "grad_norm": 21.554759979248047, |
| "kl": 0.2670654296875, |
| "learning_rate": 2e-07, |
| "loss": 0.008714067935943603, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000059604645, |
| "reward_std": 0.22781596183776856, |
| "rewards/MultiModalAccuracyORM": 0.2750000059604645, |
| "step": 2065, |
| "train_speed(iter/s)": 0.041007 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.4, |
| "epoch": 0.8363636363636363, |
| "grad_norm": 0.038795698434114456, |
| "kl": 0.09162445068359375, |
| "learning_rate": 2e-07, |
| "loss": 0.01877760738134384, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2416666679084301, |
| "reward_std": 0.12552748322486879, |
| "rewards/MultiModalAccuracyORM": 0.2416666679084301, |
| "step": 2070, |
| "train_speed(iter/s)": 0.041016 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.65, |
| "epoch": 0.8383838383838383, |
| "grad_norm": 0.5922779440879822, |
| "kl": 0.17679443359375, |
| "learning_rate": 2e-07, |
| "loss": 0.007905527949333191, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5, |
| "reward_std": 0.0, |
| "rewards/MultiModalAccuracyORM": 0.5, |
| "step": 2075, |
| "train_speed(iter/s)": 0.041029 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.05, |
| "epoch": 0.8404040404040404, |
| "grad_norm": 0.48757824301719666, |
| "kl": 0.1322998046875, |
| "learning_rate": 2e-07, |
| "loss": 0.006768345832824707, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5333333358168602, |
| "reward_std": 0.1356445163488388, |
| "rewards/MultiModalAccuracyORM": 0.5333333358168602, |
| "step": 2080, |
| "train_speed(iter/s)": 0.04104 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 8.2, |
| "epoch": 0.8424242424242424, |
| "grad_norm": 7.100019931793213, |
| "kl": 0.09602890014648438, |
| "learning_rate": 2e-07, |
| "loss": -0.010533835738897324, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833334103226663, |
| "reward_std": 0.22001479864120482, |
| "rewards/MultiModalAccuracyORM": 0.25833334103226663, |
| "step": 2085, |
| "train_speed(iter/s)": 0.041047 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.15, |
| "epoch": 0.8444444444444444, |
| "grad_norm": 10.953103065490723, |
| "kl": 0.205523681640625, |
| "learning_rate": 2e-07, |
| "loss": 0.07547287940979004, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333333805203438, |
| "reward_std": 0.16852193474769592, |
| "rewards/MultiModalAccuracyORM": 0.23333333805203438, |
| "step": 2090, |
| "train_speed(iter/s)": 0.041052 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.75, |
| "epoch": 0.8464646464646465, |
| "grad_norm": 4.194830894470215, |
| "kl": 0.0806396484375, |
| "learning_rate": 2e-07, |
| "loss": -0.017879560589790344, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3916666693985462, |
| "reward_std": 0.11702905893325806, |
| "rewards/MultiModalAccuracyORM": 0.3916666693985462, |
| "step": 2095, |
| "train_speed(iter/s)": 0.041059 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 23.75, |
| "epoch": 0.8484848484848485, |
| "grad_norm": 0.12948361039161682, |
| "kl": 0.13734283447265624, |
| "learning_rate": 2e-07, |
| "loss": -0.01447494924068451, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4750000111758709, |
| "reward_std": 0.2338038980960846, |
| "rewards/MultiModalAccuracyORM": 0.4750000111758709, |
| "step": 2100, |
| "train_speed(iter/s)": 0.041064 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 22.05, |
| "epoch": 0.8505050505050505, |
| "grad_norm": 31.3735294342041, |
| "kl": 0.177423095703125, |
| "learning_rate": 2e-07, |
| "loss": -0.0017697295174002648, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.243092343211174, |
| "rewards/MultiModalAccuracyORM": 0.25000000521540644, |
| "step": 2105, |
| "train_speed(iter/s)": 0.041067 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 4.65, |
| "epoch": 0.8525252525252526, |
| "grad_norm": 2.228029251098633, |
| "kl": 0.14156494140625, |
| "learning_rate": 2e-07, |
| "loss": -0.010953420400619506, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5166666679084301, |
| "reward_std": 0.22297748625278474, |
| "rewards/MultiModalAccuracyORM": 0.5166666679084301, |
| "step": 2110, |
| "train_speed(iter/s)": 0.041074 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.35, |
| "epoch": 0.8545454545454545, |
| "grad_norm": 0.3235064446926117, |
| "kl": 0.19440174102783203, |
| "learning_rate": 2e-07, |
| "loss": -0.010122859477996826, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2750000074505806, |
| "reward_std": 0.1888910174369812, |
| "rewards/MultiModalAccuracyORM": 0.2750000074505806, |
| "step": 2115, |
| "train_speed(iter/s)": 0.041074 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 34.1, |
| "epoch": 0.8565656565656565, |
| "grad_norm": 9.72260856628418, |
| "kl": 0.18918914794921876, |
| "learning_rate": 2e-07, |
| "loss": 0.0024737130850553514, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45833334028720857, |
| "reward_std": 0.16925235390663146, |
| "rewards/MultiModalAccuracyORM": 0.45833334028720857, |
| "step": 2120, |
| "train_speed(iter/s)": 0.041072 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 42.2, |
| "epoch": 0.8585858585858586, |
| "grad_norm": 9.817282676696777, |
| "kl": 0.299951171875, |
| "learning_rate": 2e-07, |
| "loss": 0.00935778021812439, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.17500000074505806, |
| "reward_std": 0.15824586153030396, |
| "rewards/MultiModalAccuracyORM": 0.17500000074505806, |
| "step": 2125, |
| "train_speed(iter/s)": 0.041077 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.6, |
| "epoch": 0.8606060606060606, |
| "grad_norm": 0.3442615568637848, |
| "kl": 0.17735595703125, |
| "learning_rate": 2e-07, |
| "loss": 0.006759631633758545, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.48333334028720853, |
| "reward_std": 0.2730426698923111, |
| "rewards/MultiModalAccuracyORM": 0.48333334028720853, |
| "step": 2130, |
| "train_speed(iter/s)": 0.041087 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 79.15, |
| "epoch": 0.8626262626262626, |
| "grad_norm": 0.520937979221344, |
| "kl": 0.07093505859375, |
| "learning_rate": 2e-07, |
| "loss": -0.012908129394054413, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000001043081284, |
| "reward_std": 0.1996457099914551, |
| "rewards/MultiModalAccuracyORM": 0.20000001043081284, |
| "step": 2135, |
| "train_speed(iter/s)": 0.041077 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.9, |
| "epoch": 0.8646464646464647, |
| "grad_norm": 1.4221155643463135, |
| "kl": 0.132373046875, |
| "learning_rate": 2e-07, |
| "loss": -0.07007729411125183, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.21422483026981354, |
| "rewards/MultiModalAccuracyORM": 0.2916666716337204, |
| "step": 2140, |
| "train_speed(iter/s)": 0.041076 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 34.4, |
| "epoch": 0.8666666666666667, |
| "grad_norm": 19.47251319885254, |
| "kl": 0.0463134765625, |
| "learning_rate": 2e-07, |
| "loss": 0.02097744941711426, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3666666723787785, |
| "reward_std": 0.25897533297538755, |
| "rewards/MultiModalAccuracyORM": 0.3666666723787785, |
| "step": 2145, |
| "train_speed(iter/s)": 0.041079 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 35.05, |
| "epoch": 0.8686868686868687, |
| "grad_norm": 0.0365481972694397, |
| "kl": 0.07025909423828125, |
| "learning_rate": 2e-07, |
| "loss": -0.00900230035185814, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5666666716337204, |
| "reward_std": 0.1295969843864441, |
| "rewards/MultiModalAccuracyORM": 0.5666666716337204, |
| "step": 2150, |
| "train_speed(iter/s)": 0.041076 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.55, |
| "epoch": 0.8707070707070707, |
| "grad_norm": 3.220684051513672, |
| "kl": 0.11529541015625, |
| "learning_rate": 2e-07, |
| "loss": 0.05271543264389038, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.23333334177732468, |
| "reward_std": 0.3222196638584137, |
| "rewards/MultiModalAccuracyORM": 0.23333334177732468, |
| "step": 2155, |
| "train_speed(iter/s)": 0.041085 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 30.5, |
| "epoch": 0.8727272727272727, |
| "grad_norm": 21.94721031188965, |
| "kl": 0.1160491943359375, |
| "learning_rate": 2e-07, |
| "loss": 0.0024079522117972374, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20000000447034835, |
| "reward_std": 0.24009110629558564, |
| "rewards/MultiModalAccuracyORM": 0.20000000447034835, |
| "step": 2160, |
| "train_speed(iter/s)": 0.04109 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.7, |
| "epoch": 0.8747474747474747, |
| "grad_norm": 20.038494110107422, |
| "kl": 0.1658905029296875, |
| "learning_rate": 2e-07, |
| "loss": 0.04994232654571533, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.533333345502615, |
| "reward_std": 0.325963220000267, |
| "rewards/MultiModalAccuracyORM": 0.533333345502615, |
| "step": 2165, |
| "train_speed(iter/s)": 0.041101 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 28.45, |
| "epoch": 0.8767676767676768, |
| "grad_norm": 2.1534128189086914, |
| "kl": 0.0698028564453125, |
| "learning_rate": 2e-07, |
| "loss": -0.025438961386680604, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.21666667014360427, |
| "reward_std": 0.21524804830551147, |
| "rewards/MultiModalAccuracyORM": 0.21666667014360427, |
| "step": 2170, |
| "train_speed(iter/s)": 0.041105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.25, |
| "epoch": 0.8787878787878788, |
| "grad_norm": 6.415175437927246, |
| "kl": 0.0926239013671875, |
| "learning_rate": 2e-07, |
| "loss": -0.007227879762649536, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.44166667610406873, |
| "reward_std": 0.32300969064235685, |
| "rewards/MultiModalAccuracyORM": 0.44166667610406873, |
| "step": 2175, |
| "train_speed(iter/s)": 0.041115 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.3, |
| "epoch": 0.8808080808080808, |
| "grad_norm": 0.38973256945610046, |
| "kl": 0.13404541015625, |
| "learning_rate": 2e-07, |
| "loss": 0.023914989829063416, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4000000059604645, |
| "reward_std": 0.1896214485168457, |
| "rewards/MultiModalAccuracyORM": 0.4000000059604645, |
| "step": 2180, |
| "train_speed(iter/s)": 0.041124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.9, |
| "epoch": 0.8828282828282829, |
| "grad_norm": 0.12656661868095398, |
| "kl": 0.15858612060546876, |
| "learning_rate": 2e-07, |
| "loss": 0.008176784217357635, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666679084301, |
| "reward_std": 0.07810411453247071, |
| "rewards/MultiModalAccuracyORM": 0.3416666679084301, |
| "step": 2185, |
| "train_speed(iter/s)": 0.041129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 61.4, |
| "epoch": 0.8848484848484849, |
| "grad_norm": 2.246829032897949, |
| "kl": 0.05509033203125, |
| "learning_rate": 2e-07, |
| "loss": 0.0310079425573349, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666708886623, |
| "reward_std": 0.22704698145389557, |
| "rewards/MultiModalAccuracyORM": 0.3416666708886623, |
| "step": 2190, |
| "train_speed(iter/s)": 0.041118 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.15, |
| "epoch": 0.8868686868686869, |
| "grad_norm": 0.3648838996887207, |
| "kl": 0.1862060546875, |
| "learning_rate": 2e-07, |
| "loss": -0.014291207492351531, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4416666813194752, |
| "reward_std": 0.29006352424621584, |
| "rewards/MultiModalAccuracyORM": 0.4416666813194752, |
| "step": 2195, |
| "train_speed(iter/s)": 0.041124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.0, |
| "epoch": 0.8888888888888888, |
| "grad_norm": 5.710547924041748, |
| "kl": 0.1601806640625, |
| "learning_rate": 2e-07, |
| "loss": 0.0010113120079040527, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666708886623, |
| "reward_std": 0.15824586153030396, |
| "rewards/MultiModalAccuracyORM": 0.3416666708886623, |
| "step": 2200, |
| "train_speed(iter/s)": 0.041137 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.85, |
| "epoch": 0.8909090909090909, |
| "grad_norm": 0.11420593410730362, |
| "kl": 0.1681976318359375, |
| "learning_rate": 2e-07, |
| "loss": 0.0913887619972229, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1083333358168602, |
| "reward_std": 0.18262484967708587, |
| "rewards/MultiModalAccuracyORM": 0.1083333358168602, |
| "step": 2205, |
| "train_speed(iter/s)": 0.041132 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 28.25, |
| "epoch": 0.8929292929292929, |
| "grad_norm": 21.853090286254883, |
| "kl": 0.10088920593261719, |
| "learning_rate": 2e-07, |
| "loss": 0.0005557646509259939, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.47500000819563865, |
| "reward_std": 0.16451202929019929, |
| "rewards/MultiModalAccuracyORM": 0.47500000819563865, |
| "step": 2210, |
| "train_speed(iter/s)": 0.041127 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 34.95, |
| "epoch": 0.8949494949494949, |
| "grad_norm": 0.11827383190393448, |
| "kl": 0.1541900634765625, |
| "learning_rate": 2e-07, |
| "loss": 0.0488810658454895, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3000000059604645, |
| "reward_std": 0.22625695466995238, |
| "rewards/MultiModalAccuracyORM": 0.3000000059604645, |
| "step": 2215, |
| "train_speed(iter/s)": 0.041123 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 42.2, |
| "epoch": 0.896969696969697, |
| "grad_norm": 10.474591255187988, |
| "kl": 0.10148773193359376, |
| "learning_rate": 2e-07, |
| "loss": -0.004365795105695724, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25000000521540644, |
| "reward_std": 0.25591449439525604, |
| "rewards/MultiModalAccuracyORM": 0.25000000521540644, |
| "step": 2220, |
| "train_speed(iter/s)": 0.041126 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 40.2, |
| "epoch": 0.898989898989899, |
| "grad_norm": 0.02211969904601574, |
| "kl": 0.0304107666015625, |
| "learning_rate": 2e-07, |
| "loss": 0.0038854777812957764, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.233333333581686, |
| "reward_std": 0.07409421503543853, |
| "rewards/MultiModalAccuracyORM": 0.233333333581686, |
| "step": 2225, |
| "train_speed(iter/s)": 0.041135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.35, |
| "epoch": 0.901010101010101, |
| "grad_norm": 11.09273910522461, |
| "kl": 0.1110137939453125, |
| "learning_rate": 2e-07, |
| "loss": 0.0425330251455307, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.1000000037252903, |
| "reward_std": 0.16852192878723143, |
| "rewards/MultiModalAccuracyORM": 0.1000000037252903, |
| "step": 2230, |
| "train_speed(iter/s)": 0.041144 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 14.8, |
| "epoch": 0.9030303030303031, |
| "grad_norm": 17.634380340576172, |
| "kl": 0.215167236328125, |
| "learning_rate": 2e-07, |
| "loss": 0.03751255869865418, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3833333358168602, |
| "reward_std": 0.10697162747383118, |
| "rewards/MultiModalAccuracyORM": 0.3833333358168602, |
| "step": 2235, |
| "train_speed(iter/s)": 0.041143 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.8, |
| "epoch": 0.9050505050505051, |
| "grad_norm": 0.31089159846305847, |
| "kl": 0.1969482421875, |
| "learning_rate": 2e-07, |
| "loss": 0.011410205066204071, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666738688946, |
| "reward_std": 0.18108985424041749, |
| "rewards/MultiModalAccuracyORM": 0.3416666738688946, |
| "step": 2240, |
| "train_speed(iter/s)": 0.041157 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 22.1, |
| "epoch": 0.907070707070707, |
| "grad_norm": 0.033987369388341904, |
| "kl": 0.1924041748046875, |
| "learning_rate": 2e-07, |
| "loss": 0.0015319785103201865, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.22500000298023223, |
| "reward_std": 0.12552748322486879, |
| "rewards/MultiModalAccuracyORM": 0.22500000298023223, |
| "step": 2245, |
| "train_speed(iter/s)": 0.041165 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.06531964987516403, |
| "learning_rate": 2e-07, |
| "loss": -0.01111970990896225, |
| "memory(GiB)": 104.49, |
| "step": 2250, |
| "train_speed(iter/s)": 0.041175 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 33.406667890548704, |
| "eval_kl": 0.133411865234375, |
| "eval_loss": -0.00466223806142807, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.441666671782732, |
| "eval_reward_std": 0.1628412437438965, |
| "eval_rewards/MultiModalAccuracyORM": 0.441666671782732, |
| "eval_runtime": 272.4154, |
| "eval_samples_per_second": 0.184, |
| "eval_steps_per_second": 0.018, |
| "step": 2250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 23.525, |
| "epoch": 0.9111111111111111, |
| "grad_norm": 0.03232080861926079, |
| "kl": 0.22264862060546875, |
| "learning_rate": 2e-07, |
| "loss": 0.028143799304962157, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4166666720062494, |
| "reward_std": 0.12746492475271226, |
| "rewards/MultiModalAccuracyORM": 0.4166666720062494, |
| "step": 2255, |
| "train_speed(iter/s)": 0.040921 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.4, |
| "epoch": 0.9131313131313131, |
| "grad_norm": 0.06567571312189102, |
| "kl": 0.08049087524414063, |
| "learning_rate": 2e-07, |
| "loss": 0.031807747483253476, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.45833333507180213, |
| "reward_std": 0.14564234614372254, |
| "rewards/MultiModalAccuracyORM": 0.45833333507180213, |
| "step": 2260, |
| "train_speed(iter/s)": 0.040927 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 21.6, |
| "epoch": 0.9151515151515152, |
| "grad_norm": 0.668204665184021, |
| "kl": 0.1173919677734375, |
| "learning_rate": 2e-07, |
| "loss": -0.03886902332305908, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333333432674407, |
| "reward_std": 0.16830329298973085, |
| "rewards/MultiModalAccuracyORM": 0.28333333432674407, |
| "step": 2265, |
| "train_speed(iter/s)": 0.040936 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 44.0, |
| "epoch": 0.9171717171717172, |
| "grad_norm": 0.16663120687007904, |
| "kl": 0.1005615234375, |
| "learning_rate": 2e-07, |
| "loss": 0.011882781982421875, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3083333380520344, |
| "reward_std": 0.2659719318151474, |
| "rewards/MultiModalAccuracyORM": 0.3083333380520344, |
| "step": 2270, |
| "train_speed(iter/s)": 0.040942 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.5, |
| "epoch": 0.9191919191919192, |
| "grad_norm": 18.440631866455078, |
| "kl": 0.121307373046875, |
| "learning_rate": 2e-07, |
| "loss": 0.01434231996536255, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.33333333805203436, |
| "reward_std": 0.17075418531894684, |
| "rewards/MultiModalAccuracyORM": 0.33333333805203436, |
| "step": 2275, |
| "train_speed(iter/s)": 0.040956 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.25, |
| "epoch": 0.9212121212121213, |
| "grad_norm": 30.9835147857666, |
| "kl": 0.118048095703125, |
| "learning_rate": 2e-07, |
| "loss": 0.012100108712911607, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3583333373069763, |
| "reward_std": 0.17781037986278533, |
| "rewards/MultiModalAccuracyORM": 0.3583333373069763, |
| "step": 2280, |
| "train_speed(iter/s)": 0.040966 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.3, |
| "epoch": 0.9232323232323232, |
| "grad_norm": 6.152209758758545, |
| "kl": 0.3557861328125, |
| "learning_rate": 2e-07, |
| "loss": -0.025510752201080324, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000074505806, |
| "reward_std": 0.1652424544095993, |
| "rewards/MultiModalAccuracyORM": 0.30000000074505806, |
| "step": 2285, |
| "train_speed(iter/s)": 0.040978 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 47.3, |
| "epoch": 0.9252525252525252, |
| "grad_norm": 22.69240951538086, |
| "kl": 0.155462646484375, |
| "learning_rate": 2e-07, |
| "loss": -0.0011336962692439557, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.21422483026981354, |
| "rewards/MultiModalAccuracyORM": 0.2916666716337204, |
| "step": 2290, |
| "train_speed(iter/s)": 0.040975 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.15, |
| "epoch": 0.9272727272727272, |
| "grad_norm": 0.06437839567661285, |
| "kl": 0.16920166015625, |
| "learning_rate": 2e-07, |
| "loss": 0.0063018262386322025, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333730697633, |
| "reward_std": 0.07810411453247071, |
| "rewards/MultiModalAccuracyORM": 0.25833333730697633, |
| "step": 2295, |
| "train_speed(iter/s)": 0.040991 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 56.8, |
| "epoch": 0.9292929292929293, |
| "grad_norm": 0.896676778793335, |
| "kl": 0.12425537109375, |
| "learning_rate": 2e-07, |
| "loss": 0.01196231171488762, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2916666716337204, |
| "reward_std": 0.29564401507377625, |
| "rewards/MultiModalAccuracyORM": 0.2916666716337204, |
| "step": 2300, |
| "train_speed(iter/s)": 0.040994 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 16.8, |
| "epoch": 0.9313131313131313, |
| "grad_norm": 1.9378466606140137, |
| "kl": 0.067706298828125, |
| "learning_rate": 2e-07, |
| "loss": -0.021140041947364806, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333410322666, |
| "reward_std": 0.2504173070192337, |
| "rewards/MultiModalAccuracyORM": 0.4333333410322666, |
| "step": 2305, |
| "train_speed(iter/s)": 0.041006 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.95, |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.4809723496437073, |
| "kl": 0.1289764404296875, |
| "learning_rate": 2e-07, |
| "loss": 0.003021649643778801, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.10833333730697632, |
| "reward_std": 0.2071926474571228, |
| "rewards/MultiModalAccuracyORM": 0.10833333730697632, |
| "step": 2310, |
| "train_speed(iter/s)": 0.041021 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 42.75, |
| "epoch": 0.9353535353535354, |
| "grad_norm": 0.06879542768001556, |
| "kl": 0.09110107421875, |
| "learning_rate": 2e-07, |
| "loss": -0.004359513521194458, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.47500000298023226, |
| "reward_std": 0.2159808874130249, |
| "rewards/MultiModalAccuracyORM": 0.47500000298023226, |
| "step": 2315, |
| "train_speed(iter/s)": 0.041036 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 13.2, |
| "epoch": 0.9373737373737374, |
| "grad_norm": 0.226049542427063, |
| "kl": 0.09764404296875, |
| "learning_rate": 2e-07, |
| "loss": 0.0010025198571383953, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.20833333805203438, |
| "reward_std": 0.12552748322486879, |
| "rewards/MultiModalAccuracyORM": 0.20833333805203438, |
| "step": 2320, |
| "train_speed(iter/s)": 0.04105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.95, |
| "epoch": 0.9393939393939394, |
| "grad_norm": 7.9168314933776855, |
| "kl": 0.0877655029296875, |
| "learning_rate": 2e-07, |
| "loss": 0.06811027526855469, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5333333387970924, |
| "reward_std": 0.21149236261844634, |
| "rewards/MultiModalAccuracyORM": 0.5333333387970924, |
| "step": 2325, |
| "train_speed(iter/s)": 0.041062 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 10.2, |
| "epoch": 0.9414141414141414, |
| "grad_norm": 0.2699204385280609, |
| "kl": 0.180908203125, |
| "learning_rate": 2e-07, |
| "loss": 0.0060350816696882244, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.27500000447034834, |
| "reward_std": 0.09041781425476074, |
| "rewards/MultiModalAccuracyORM": 0.27500000447034834, |
| "step": 2330, |
| "train_speed(iter/s)": 0.041071 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.55, |
| "epoch": 0.9434343434343434, |
| "grad_norm": 27.749364852905273, |
| "kl": 0.22237548828125, |
| "learning_rate": 2e-07, |
| "loss": 0.08456591367721558, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.29166667312383654, |
| "reward_std": 0.1888910174369812, |
| "rewards/MultiModalAccuracyORM": 0.29166667312383654, |
| "step": 2335, |
| "train_speed(iter/s)": 0.041075 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 69.25, |
| "epoch": 0.9454545454545454, |
| "grad_norm": 5.552628517150879, |
| "kl": 0.0543975830078125, |
| "learning_rate": 2e-07, |
| "loss": -0.05388938784599304, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.07500000149011612, |
| "reward_std": 0.19962169826030732, |
| "rewards/MultiModalAccuracyORM": 0.07500000149011612, |
| "step": 2340, |
| "train_speed(iter/s)": 0.041078 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.7, |
| "epoch": 0.9474747474747475, |
| "grad_norm": 9.49284839630127, |
| "kl": 0.11671142578125, |
| "learning_rate": 2e-07, |
| "loss": -0.00172628965228796, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.19166667014360428, |
| "reward_std": 0.22629254460334777, |
| "rewards/MultiModalAccuracyORM": 0.19166667014360428, |
| "step": 2345, |
| "train_speed(iter/s)": 0.04109 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 56.05, |
| "epoch": 0.9494949494949495, |
| "grad_norm": 3.0689406394958496, |
| "kl": 0.09317855834960938, |
| "learning_rate": 2e-07, |
| "loss": 0.013809925317764283, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.31666667088866235, |
| "reward_std": 0.3167103588581085, |
| "rewards/MultiModalAccuracyORM": 0.31666667088866235, |
| "step": 2350, |
| "train_speed(iter/s)": 0.041098 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.8, |
| "epoch": 0.9515151515151515, |
| "grad_norm": 0.1557140052318573, |
| "kl": 0.280633544921875, |
| "learning_rate": 2e-07, |
| "loss": -0.00421304777264595, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666666865348817, |
| "reward_std": 0.07409421503543853, |
| "rewards/MultiModalAccuracyORM": 0.41666666865348817, |
| "step": 2355, |
| "train_speed(iter/s)": 0.041108 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 30.65, |
| "epoch": 0.9535353535353536, |
| "grad_norm": 7.580443382263184, |
| "kl": 0.096343994140625, |
| "learning_rate": 2e-07, |
| "loss": -0.022874367237091065, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.6333333358168602, |
| "reward_std": 0.17861495018005372, |
| "rewards/MultiModalAccuracyORM": 0.6333333358168602, |
| "step": 2360, |
| "train_speed(iter/s)": 0.041118 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 12.8, |
| "epoch": 0.9555555555555556, |
| "grad_norm": 0.11349290609359741, |
| "kl": 0.21148681640625, |
| "learning_rate": 2e-07, |
| "loss": -6.924470653757453e-05, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5166666708886624, |
| "reward_std": 0.1840525358915329, |
| "rewards/MultiModalAccuracyORM": 0.5166666708886624, |
| "step": 2365, |
| "train_speed(iter/s)": 0.041134 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.8, |
| "epoch": 0.9575757575757575, |
| "grad_norm": 16.9438419342041, |
| "kl": 0.20333251953125, |
| "learning_rate": 2e-07, |
| "loss": 0.008581924438476562, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.41666666939854624, |
| "reward_std": 0.16225576102733613, |
| "rewards/MultiModalAccuracyORM": 0.41666666939854624, |
| "step": 2370, |
| "train_speed(iter/s)": 0.041147 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.7, |
| "epoch": 0.9595959595959596, |
| "grad_norm": 26.406293869018555, |
| "kl": 0.1427520751953125, |
| "learning_rate": 2e-07, |
| "loss": 0.011251689493656158, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500000596046447, |
| "reward_std": 0.23866584599018098, |
| "rewards/MultiModalAccuracyORM": 0.32500000596046447, |
| "step": 2375, |
| "train_speed(iter/s)": 0.041156 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 44.35, |
| "epoch": 0.9616161616161616, |
| "grad_norm": 0.03468816727399826, |
| "kl": 0.09806137084960938, |
| "learning_rate": 2e-07, |
| "loss": 0.008510185778141022, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5083333373069763, |
| "reward_std": 0.1037161648273468, |
| "rewards/MultiModalAccuracyORM": 0.5083333373069763, |
| "step": 2380, |
| "train_speed(iter/s)": 0.041159 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.5, |
| "epoch": 0.9636363636363636, |
| "grad_norm": 12.14474105834961, |
| "kl": 0.127783203125, |
| "learning_rate": 2e-07, |
| "loss": 0.031885528564453126, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.25833333656191826, |
| "reward_std": 0.25566026866436004, |
| "rewards/MultiModalAccuracyORM": 0.25833333656191826, |
| "step": 2385, |
| "train_speed(iter/s)": 0.041174 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 19.1, |
| "epoch": 0.9656565656565657, |
| "grad_norm": 0.8151546716690063, |
| "kl": 0.13538818359375, |
| "learning_rate": 2e-07, |
| "loss": 0.012065254151821136, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.233333333581686, |
| "reward_std": 0.07409421503543853, |
| "rewards/MultiModalAccuracyORM": 0.233333333581686, |
| "step": 2390, |
| "train_speed(iter/s)": 0.041185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 46.45, |
| "epoch": 0.9676767676767677, |
| "grad_norm": 22.97179412841797, |
| "kl": 0.0504150390625, |
| "learning_rate": 2e-07, |
| "loss": 0.00892886370420456, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.32500001043081284, |
| "reward_std": 0.386316055059433, |
| "rewards/MultiModalAccuracyORM": 0.32500001043081284, |
| "step": 2395, |
| "train_speed(iter/s)": 0.041191 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 15.7, |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.13443566858768463, |
| "kl": 0.0746551513671875, |
| "learning_rate": 2e-07, |
| "loss": -0.008957084268331528, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.05833333507180214, |
| "reward_std": 0.11702905893325806, |
| "rewards/MultiModalAccuracyORM": 0.05833333507180214, |
| "step": 2400, |
| "train_speed(iter/s)": 0.041198 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 5.3, |
| "epoch": 0.9717171717171718, |
| "grad_norm": 13.01309871673584, |
| "kl": 0.1608978271484375, |
| "learning_rate": 2e-07, |
| "loss": -0.005169375985860825, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4333333395421505, |
| "reward_std": 0.2074468731880188, |
| "rewards/MultiModalAccuracyORM": 0.4333333395421505, |
| "step": 2405, |
| "train_speed(iter/s)": 0.041211 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 65.4, |
| "epoch": 0.9737373737373738, |
| "grad_norm": 20.76219367980957, |
| "kl": 0.10498046875, |
| "learning_rate": 2e-07, |
| "loss": -0.026147454977035522, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3166666693985462, |
| "reward_std": 0.23933667540550232, |
| "rewards/MultiModalAccuracyORM": 0.3166666693985462, |
| "step": 2410, |
| "train_speed(iter/s)": 0.041217 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 17.3, |
| "epoch": 0.9757575757575757, |
| "grad_norm": 5.97620964050293, |
| "kl": 0.098968505859375, |
| "learning_rate": 2e-07, |
| "loss": 0.04436638355255127, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5916666783392429, |
| "reward_std": 0.26292563080787656, |
| "rewards/MultiModalAccuracyORM": 0.5916666783392429, |
| "step": 2415, |
| "train_speed(iter/s)": 0.041224 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.9, |
| "epoch": 0.9777777777777777, |
| "grad_norm": 0.16142967343330383, |
| "kl": 0.2656707763671875, |
| "learning_rate": 2e-07, |
| "loss": 0.010275793075561524, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4750000022351742, |
| "reward_std": 0.12558708488941192, |
| "rewards/MultiModalAccuracyORM": 0.4750000022351742, |
| "step": 2420, |
| "train_speed(iter/s)": 0.041233 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 29.5, |
| "epoch": 0.9797979797979798, |
| "grad_norm": 5.270585060119629, |
| "kl": 0.1023193359375, |
| "learning_rate": 2e-07, |
| "loss": 0.013689932227134705, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2583333417773247, |
| "reward_std": 0.2817953139543533, |
| "rewards/MultiModalAccuracyORM": 0.2583333417773247, |
| "step": 2425, |
| "train_speed(iter/s)": 0.041241 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 24.5, |
| "epoch": 0.9818181818181818, |
| "grad_norm": 2.2413382530212402, |
| "kl": 0.09530487060546874, |
| "learning_rate": 2e-07, |
| "loss": -0.009250025451183318, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5166666693985462, |
| "reward_std": 0.17150862216949464, |
| "rewards/MultiModalAccuracyORM": 0.5166666693985462, |
| "step": 2430, |
| "train_speed(iter/s)": 0.041258 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 7.1, |
| "epoch": 0.9838383838383838, |
| "grad_norm": 0.14606672525405884, |
| "kl": 0.098297119140625, |
| "learning_rate": 2e-07, |
| "loss": -0.021257255971431733, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.28333334252238274, |
| "reward_std": 0.19713521599769593, |
| "rewards/MultiModalAccuracyORM": 0.28333334252238274, |
| "step": 2435, |
| "train_speed(iter/s)": 0.041266 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 11.3, |
| "epoch": 0.9858585858585859, |
| "grad_norm": 2.6238768100738525, |
| "kl": 0.10804595947265624, |
| "learning_rate": 2e-07, |
| "loss": 0.007257813215255737, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.30000000149011613, |
| "reward_std": 0.1974250316619873, |
| "rewards/MultiModalAccuracyORM": 0.30000000149011613, |
| "step": 2440, |
| "train_speed(iter/s)": 0.041271 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 18.15, |
| "epoch": 0.9878787878787879, |
| "grad_norm": 0.03827716410160065, |
| "kl": 0.101373291015625, |
| "learning_rate": 2e-07, |
| "loss": 0.011828117072582245, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.4916666731238365, |
| "reward_std": 0.181566059589386, |
| "rewards/MultiModalAccuracyORM": 0.4916666731238365, |
| "step": 2445, |
| "train_speed(iter/s)": 0.041278 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 43.4, |
| "epoch": 0.98989898989899, |
| "grad_norm": 6.416419982910156, |
| "kl": 0.2295166015625, |
| "learning_rate": 2e-07, |
| "loss": 0.009897831082344054, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3750000029802322, |
| "reward_std": 0.10072947144508362, |
| "rewards/MultiModalAccuracyORM": 0.3750000029802322, |
| "step": 2450, |
| "train_speed(iter/s)": 0.041283 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 9.6, |
| "epoch": 0.9919191919191919, |
| "grad_norm": 3.0410783290863037, |
| "kl": 0.14271240234375, |
| "learning_rate": 2e-07, |
| "loss": -0.015740707516670227, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5083333380520344, |
| "reward_std": 0.27522478699684144, |
| "rewards/MultiModalAccuracyORM": 0.5083333380520344, |
| "step": 2455, |
| "train_speed(iter/s)": 0.041292 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.15, |
| "epoch": 0.9939393939393939, |
| "grad_norm": 0.742748498916626, |
| "kl": 0.2917930603027344, |
| "learning_rate": 2e-07, |
| "loss": 0.06221296787261963, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.2833333402872086, |
| "reward_std": 0.17702035307884217, |
| "rewards/MultiModalAccuracyORM": 0.2833333402872086, |
| "step": 2460, |
| "train_speed(iter/s)": 0.041301 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 39.7, |
| "epoch": 0.9959595959595959, |
| "grad_norm": 0.5455455780029297, |
| "kl": 0.1237335205078125, |
| "learning_rate": 2e-07, |
| "loss": 0.04647340774536133, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.5750000044703484, |
| "reward_std": 0.09041781425476074, |
| "rewards/MultiModalAccuracyORM": 0.5750000044703484, |
| "step": 2465, |
| "train_speed(iter/s)": 0.041305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 6.6, |
| "epoch": 0.997979797979798, |
| "grad_norm": 3.567203998565674, |
| "kl": 0.128204345703125, |
| "learning_rate": 2e-07, |
| "loss": -0.006601794809103012, |
| "memory(GiB)": 104.49, |
| "response_clip_ratio": 0.0, |
| "reward": 0.3416666753590107, |
| "reward_std": 0.3019101768732071, |
| "rewards/MultiModalAccuracyORM": 0.3416666753590107, |
| "step": 2470, |
| "train_speed(iter/s)": 0.04132 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 24.7083740234375, |
| "learning_rate": 2e-07, |
| "loss": 0.018315188586711884, |
| "memory(GiB)": 104.49, |
| "step": 2475, |
| "train_speed(iter/s)": 0.041332 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 28.336667232513427, |
| "eval_kl": 0.152705078125, |
| "eval_loss": 0.011019712314009666, |
| "eval_response_clip_ratio": 0.0, |
| "eval_reward": 0.4650000059604645, |
| "eval_reward_std": 0.1907379400730133, |
| "eval_rewards/MultiModalAccuracyORM": 0.4650000059604645, |
| "eval_runtime": 238.5041, |
| "eval_samples_per_second": 0.21, |
| "eval_steps_per_second": 0.021, |
| "step": 2475 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2475, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|